def _create_or_update_organization(self, data_dict, harvest_job):

        context = {
            'model': model,
            'session': model.Session,
            'user': self._get_user_name(),
            'ignore_auth': True,
        }

        try:
            log.info("Finding organization..")
            log.info(data_dict['id'])
            org = p.toolkit.get_action('organization_show')(context, {'id': data_dict['id']})
            log.info("found", org)

            last_finished_job = self._last_finished_job(harvest_job)
            log.info(last_finished_job)
            if last_finished_job and last_finished_job < data_dict['changed']:
                log.info("updating organization")
                org = p.toolkit.get_action('organization_update')(context, {'title': data_dict['name'],
                                                                      'name': munge_title_to_name(data_dict['name']),
                                                                      'id': data_dict['id']})
        except NotFound:
            log.info("Organization %s not found, creating...", data_dict['name'])

            # Get rid of auth audit on the context otherwise we'll get an
            # exception
            context.pop('__auth_audit', None)

            org = p.toolkit.get_action('organization_create')(context, {'title': data_dict['name'],
                                                                        'name': munge_title_to_name(data_dict['name']),
                                                                        'id': data_dict['id']})
            log.info(org)

        return org
    def do_publisher(cls, publisher_nid):
        from ckan import model
        from ckan.lib.munge import munge_title_to_name

        log = global_log

        pub = cls.get_cached_publisher_details(publisher_nid)

        title = pub["title"].strip()

        slug = munge_title_to_name(title)
        g = model.Group.get(slug)
        if g:
            log.info("Found publisher in db: %s", g.name)
        else:
            cls.status.record("Not found in CKAN db", slug, do_print=False)
            log.warn("Ignoring publisher that cannot be found in db: %s", slug)
            return

        if pub.get("parent_node"):
            parent_pub_title = cls.get_cached_publisher_details(pub["parent_node"])["title"]
            parent_name = munge_title_to_name(parent_pub_title)
            parent = model.Group.get(parent_name)
            if not parent:
                cls.status.record("Cannot find parent in CKAN db", g.name, do_print=False)
                log.warning("Cannot find parent %s of %s", parent_name, pub.name)
                return

            existing_parents = [
                m.group
                for m in model.Session.query(model.Member)
                .filter(model.Member.table_name == "group")
                .filter(model.Member.table_id == g.id)
                .filter(model.Member.state == "active")
            ]
            if existing_parents:
                if len(existing_parents) > 1:
                    log.warn("Multiple parents for %s: %r", g.name, [p.name for p in existing_parents])
                if parent in existing_parents:
                    cls.status.record("Correct parent already", g.name, do_print=False)
                    log.info("Correct parent already: %s parent of %s", parent.name, g.name)
                    return
                else:
                    cls.status.record("Has another parent", g.name, do_print=False)
                    log.info(
                        "Has another parent: %r (instead of %s) parent of %s",
                        [p.name for p in existing_parents],
                        parent.name,
                        g.name,
                    )
                    return

            m = model.Member(group=parent, table_id=g.id, table_name="group")
            model.Session.add(m)
            model.Session.commit()
            cls.status.record("Parent added", slug, do_print=False)
            log.info("%s is made parent of %s", parent.name, g.name)
        else:
            log.info("%s has no parent in Drupal" % g.name)
            cls.status.record("Has no parent in Drupal", g.name, do_print=False)
    def add_publisher(cls, publisher_nid):
        from ckan import model
        from ckan.lib.munge import munge_title_to_name

        if int(publisher_nid) in ignore_publishers:
            global_log.info('Publisher ignored: %s (%s)', publisher_nid,
                            cls.get_cached_publisher_details(publisher_nid))
            return

        pub = cls.get_cached_publisher_details(publisher_nid)

        title = pub['title'].strip()

        slug = munge_title_to_name(title)
        g = model.Group.get(slug)
        if g:
            global_log.info('Publisher already exists in db: %s', slug)
        else:
            g = model.Group(name=slug)
            model.Session.add(g)

        g.title=title
        g.type='publisher'
        g.description=pub['body']
        field_pub_web_title = pub['field_pub_web'][0]['title'] if pub['field_pub_web'] else ''
        g.extras['contact-name'] = '%s contact' % field_pub_web_title if field_pub_web_title else ''
        g.extras['contact-email'] = pub['field_pub_email_display'][0]['email'] if pub['field_pub_email_display'] else ''
        g.extras['contact-phone'] = ''
        g.extras['foi-name'] = ''
        g.extras['foi-email'] = ''
        g.extras['foi-web'] = ''
        g.extras['foi-phone'] = ''
        acronym = pub['field_acronym'][0]['value'] if pub['field_acronym'] else ''
        g.extras['abbreviation'] = acronym or ''
        g.extras['website-url'] = (pub['field_pub_web'][0]['url'] or '') if pub['field_pub_web'] else ''
        g.extras['website-name'] = (pub['field_pub_web'][0]['title'] or '') if pub['field_pub_web'] else ''
        model.Session.commit()
        title_and_abbreviation = '%s (%s)' % (title, acronym) if acronym else title
        global_log.info('Added/edited publisher: %s <%s>', title_and_abbreviation, publisher_nid)

        if pub.get('parent_node'):
            parent_pub_title = cls.get_cached_publisher_details(pub['parent_node'])['title']
            parent = model.Group.get(munge_title_to_name(parent_pub_title))
            if not parent:
                parent = cls.add_publisher(pub['parent_node'])

            if model.Session.query(model.Member).\
                filter(model.Member.group==parent).\
                filter(model.Member.table_id==g.id).count() == 0:
                m = model.Member(group=parent, table_id=g.id, table_name='group')
                model.Session.add(m)
                global_log.info('%s is parent of %s', parent.name, g.name)
            else:
                global_log.info('%s is already a parent of %s', parent.name, g.name)
            model.Session.commit()

        return g
    def do_publisher(cls, publisher_nid):
        from ckan import model
        from ckan.lib.munge import munge_title_to_name
        log = global_log

        pub = cls.get_cached_publisher_details(publisher_nid)

        title = pub['title'].strip()

        slug = munge_title_to_name(title)
        g = model.Group.get(slug)
        if g:
            log.info('Found publisher in db: %s', g.name)
        else:
            cls.status.record('Not found in CKAN db', slug, do_print=False)
            log.warn('Ignoring publisher that cannot be found in db: %s', slug)
            return

        if pub.get('parent_node'):
            parent_pub_title = cls.get_cached_publisher_details(pub['parent_node'])['title']
            parent_name = munge_title_to_name(parent_pub_title)
            parent = model.Group.get(parent_name)
            if not parent:
                cls.status.record('Cannot find parent in CKAN db', g.name, do_print=False)
                log.warning('Cannot find parent %s of %s', parent_name, pub.name)
                return

            existing_parents = [m.group for m in model.Session.query(model.Member).\
                                filter(model.Member.table_name=='group').\
                                filter(model.Member.table_id==g.id).\
                                filter(model.Member.state=='active')]
            if existing_parents:
                if len(existing_parents) > 1:
                    log.warn('Multiple parents for %s: %r', g.name,
                             [p.name for p in existing_parents])
                if parent in existing_parents:
                    cls.status.record('Correct parent already',
                                       g.name, do_print=False)
                    log.info('Correct parent already: %s parent of %s',
                             parent.name, g.name)
                    return
                else:
                    cls.status.record('Has another parent',
                                       g.name, do_print=False)
                    log.info('Has another parent: %r (instead of %s) parent of %s',
                             [p.name for p in existing_parents], parent.name, g.name)
                    return

            m = model.Member(group=parent, table_id=g.id, table_name='group')
            model.Session.add(m)
            model.Session.commit()
            cls.status.record('Parent added', slug, do_print=False)
            log.info('%s is made parent of %s', parent.name, g.name)
        else:
            log.info('%s has no parent in Drupal' % g.name)
            cls.status.record('Has no parent in Drupal',
                               g.name, do_print=False)
    def _create_or_update_organization(self, data_dict, harvest_job):

        context = {
            'model': model,
            'session': model.Session,
            'user': self._get_user_name(),
            'ignore_auth': True,
        }

        try:
            org = p.toolkit.get_action('organization_show')(context, {'id': data_dict['id']})
        except NotFound:
            org = None

        if org:
            log.info("found %s", org)

            if data_dict['removed']:
                log.info("Organization was removed, removing from catalog..")
                p.toolkit.get_action('organization_delete')(context, org)
                return None

            if self.config.get('force_all', False) is True:
                last_time = "2011-01-01"
            else:
                last_time = self._last_error_free_job_time(harvest_job)
            if last_time and last_time < data_dict['changed']:
                org_data = {
                        'title': data_dict['name'],
                        'name': munge_title_to_name(data_dict['name']),
                        'id': data_dict['id']}
                org = p.toolkit.get_action('organization_patch')(context, org_data)

        else:
            log.info("Organization %s not found, creating...", data_dict['name'])

            if data_dict['removed']:
                log.info("Organization was removed, not creating..")
                return None

            # Get rid of auth audit on the context otherwise we'll get an
            # exception
            context.pop('__auth_audit', None)

            org_data = {
                    'title': data_dict['name'],
                    'name': munge_title_to_name(data_dict['name']),
                    'id': data_dict['id']}
            org = p.toolkit.get_action('organization_create')(context, org_data)
            log.info(org)

        return org
Exemple #6
0
    def _gen_new_name(cls, title, existing_name=None,
                      append_type='number-sequence'):
        '''
        Returns a 'name' for the dataset (URL friendly), based on the title.

        If the ideal name is already used, it will append a number to it to
        ensure it is unique.

        If generating a new name because the title of the dataset has changed,
        specify the existing name, in case the name doesn't need to change
        after all.

        :param existing_name: the current name of the dataset - only specify
                              this if the dataset exists
        :type existing_name: string
        :param append_type: the type of characters to add to make it unique -
                            either 'number-sequence' or 'random-hex'.
        :type append_type: string
        '''

        ideal_name = munge_title_to_name(title)
        ideal_name = re.sub('-+', '-', ideal_name)  # collapse multiple dashes
        return cls._ensure_name_is_unique(ideal_name,
                                          existing_name=existing_name,
                                          append_type=append_type)
Exemple #7
0
    def _gen_new_name(cls, title, existing_name=None,
                      append_type=None):
        '''
        Returns a 'name' for the dataset (URL friendly), based on the title.

        If the ideal name is already used, it will append a number to it to
        ensure it is unique.

        If generating a new name because the title of the dataset has changed,
        specify the existing name, in case the name doesn't need to change
        after all.

        :param existing_name: the current name of the dataset - only specify
                              this if the dataset exists
        :type existing_name: string
        :param append_type: the type of characters to add to make it unique -
                            either 'number-sequence' or 'random-hex'.
        :type append_type: string
        '''

        # If append_type was given, use it. Otherwise, use the configured default.
        # If nothing was given and no defaults were set, use 'number-sequence'.
        if append_type:
            append_type_param = append_type
        else:
            append_type_param = config.get('ckanext.harvest.default_dataset_name_append',
                                           'number-sequence')

        ideal_name = munge_title_to_name(title)
        ideal_name = re.sub('-+', '-', ideal_name)  # collapse multiple dashes
        return cls._ensure_name_is_unique(ideal_name,
                                          existing_name=existing_name,
                                          append_type=append_type_param)
Exemple #8
0
def organization_import(data):
    """ Import organizations """
    _load_config()
    context = _create_context()
    configuration = simplejson.loads(data)
    data_url = configuration.get('url')
    public_organization = configuration.get('public_organization', False)

    with closing(urllib2.urlopen(data_url)) as source:
        data = simplejson.load(source)

        for item in data:
            values = {}
            if isinstance(item, basestring):
                values['title'] = item.strip()
                values['name'] = munge_title_to_name(values['title']).lower()
            else:
                values['name'] = item.pop('name')
                values['title'] = item.pop('title')
                values['description'] = item.pop('description', None)
                values['extras'] = [{'key': key, 'value': value} for key, value in item.iteritems()]
            values['id'] = values['name']

            if public_organization:
                values['extras'] = [{'key': 'public_adminstration_organization', 'value': 'true'}]
            try:
                get_action('organization_show')(context, {'id': values['id']})
                # Do not override organizations
            except NotFound:
                get_action('organization_create')(context, values)
    def make_package_name(self, title, exclude_existing_package):
        '''
        Creates a URL friendly name from a title

        If the name already exists, it will add some random characters at the end
        '''

        name = munge_title_to_name(title).replace('_', '-')
        while '--' in name:
            name = name.replace('--', '-')
        name = name[0:90] # max length is 100

        # Is this slug already in use (and if we're updating a package, is it in
        # use by a different package?).
        pkg_obj = Session.query(Package).filter(Package.name == name).filter(Package.id != exclude_existing_package).first()
        if not pkg_obj:
            # The name is available, so use it. Note that if we're updating an
            # existing package we will be updating this package's URL, so incoming
            # links may break.
            return name

        if exclude_existing_package:
            # The name is not available, and we're updating a package. Chances
            # are the package's name already had some random string attached
            # to it last time. Prevent spurrious updates to the package's URL
            # (choosing new random text) by just reusing the existing package's
            # name.
            pkg_obj = Session.query(Package).filter(Package.id == exclude_existing_package).first()
            if pkg_obj: # the package may not exist yet because we may be passed the desired package GUID before a new package is instantiated
                return pkg_obj.name

        # Append some random text to the URL. Hope that with five character
        # there will be no collsion.
        return name + "-" + str(uuid.uuid4())[:5]
    def set_dataset_info(self, package, dataset, harvester_config):
        extra(package, "Agency", "Department of Health & Human Services")
        package["author"] = "Centers for Medicare & Medicaid Services"
        extra(package, "author_id", "http://healthdata.gov/id/agency/cms")
        extra(package, "Bureau Code", "009:38")
        package["title"] = dataset["Name"].strip()
        package["notes"] = dataset.get("Description")
        
        package["url"] = dataset.get("Address")

        dataset_hd = dataset["HealthData"]
        extra(package, "Date Released", parsedate(dataset_hd.get("DateReleased")))
        extra(package, "Date Updated", parsedate(dataset_hd.get("DateUpdated")))
        extra(package, "Agency Program URL", dataset_hd.get("AgencyProgramURL"))
        extra(package, "Subject Area 1", "Medicare")
        extra(package, "Unit of Analysis", dataset_hd.get("UnitOfAnalysis"))
        extra(package, "Data Dictionary", dataset_hd.get("DataDictionaryURL"))
        extra(package, "Coverage Period", dataset_hd.get("Coverage Period"))
        extra(package, "Collection Frequency", dataset_hd.get("Collection Frequency"))
        extra(package, "Geographic Scope", dataset_hd.get("GeographicScope"))
        #extra(package, "Contact Name", dataset_hd.get("GenericContactName", None) or dataset_hd.get("ContactName")) # 'X or Y' syntax returns Y if X is either None or the empty string
        #extra(package, "Contact Email", dataset_hd.get("GenericContactEmail", None) or dataset_hd.get("ContactEmail"))
        extra(package, "License Agreement", dataset_hd.get("DataLicenseAgreementURL"))
        
        from ckan.lib.munge import munge_title_to_name
        package["tags"] = [ { "name": munge_title_to_name(t["Name"]) } for t in dataset.get("Keywords", [])]
    def migrate(self):
        '''

        '''
        related_items = get_action('related_list')(data_dict={})

        # preflight:
        # related items must have unique titles before migration
        related_titles = [i['title'] for i in related_items]
        # make a list of duplicate titles
        duplicate_titles = self._find_duplicates(related_titles)
        if duplicate_titles:
            print(
                """All Related Items must have unique titles before migration. The following
Related Item titles are used more than once and need to be corrected before
migration can continue. Please correct and try again:"""
            )
            for i in duplicate_titles:
                print(i)
            return

        for related in related_items:
            existing_showcase = get_action('package_search')(
                data_dict={'fq': '+dataset_type:showcase original_related_item_id:{0}'.format(related['id'])})
            normalized_title = substitute_ascii_equivalents(related['title'])
            if existing_showcase['count'] > 0:
                print('Showcase for Related Item "{0}" already exists.'.format(
                    normalized_title))
            else:
                data_dict = {
                    'original_related_item_id': related.get('id'),
                    'title': related.get('title'),
                    'name': munge_title_to_name(related.get('title')),
                    'notes': related.get('description'),
                    'image_url': related.get('image_url'),
                    'url': related.get('url'),
                    'tags': [{"name": related.get('type').lower()}]
                }
                # make the showcase
                try:
                    new_showcase = get_action('ckanext_showcase_create')(
                        data_dict=data_dict)
                except Exception as e:
                    print('There was a problem migrating "{0}": {1}'.format(
                        normalized_title, e))
                else:
                    print('Created Showcase from the Related Item "{0}"'.format(normalized_title))

                    # make the showcase_package_association, if needed
                    try:
                        related_pkg_id = self._get_related_dataset(
                            related['id'])
                        if related_pkg_id:
                            get_action('ckanext_showcase_package_association_create')(
                                data_dict={'showcase_id': new_showcase['id'],
                                           'package_id': related_pkg_id})
                    except Exception as e:
                        print('There was a problem creating the showcase_package_association for "{0}": {1}'.format(
                            normalized_title, e))
Exemple #12
0
 def _gen_new_name(self,title):
     '''
     Creates a URL friendly name from a title
     '''
     name = munge_title_to_name(title).replace('_', '-')
     while '--' in name:
         name = name.replace('--', '-')
     return name
Exemple #13
0
 def _gen_new_title(self, title, related_id):
     name = munge_title_to_name(title)
     pkg_obj = model.Session.query(model.Package).filter_by(name=name).first()
     if pkg_obj:
         title.replace('duplicate_', '')
         return 'duplicate_' + title + '_' + related_id
     else:
         return title
Exemple #14
0
 def create_organization_dict(self, inventory_id, title):
     return {
         "name": munge_title_to_name(title),
         "title": title,
         "inventory_organization_id": inventory_id,
         "is_organization": True,
         "type": "organization",
     }
 def setup_class(cls):
     SpatialTestBase.setup_class()
     for fixture_x in cls.fixtures_x:
         bbox = cls.x_values_to_bbox(fixture_x)
         bbox_geojson = bbox_2_geojson(bbox)
         cls.create_package(name=munge_title_to_name(str(fixture_x)),
                            title=str(fixture_x),
                            extras=[{'key': 'spatial',
                                     'value': bbox_geojson}])
    def import_stage(self, harvest_object):
        log.debug('In SwisstopoHarvester import_stage')

        if not harvest_object:
            log.error('No harvest object received')
            return False

        try:
            package_dict = json.loads(harvest_object.content)

            package_dict['id'] = harvest_object.guid
            package_dict['name'] = munge_title_to_name(
                package_dict['layer_name']
            )
            user = model.User.get(self.HARVEST_USER)
            context = {
                'model': model,
                'session': Session,
                'user': self.HARVEST_USER
                }

            # Find or create group the dataset should get assigned to
            package_dict['groups'] = self._find_or_create_groups(context)

            # Find or create the organization
            # the dataset should get assigned to
            package_dict['owner_org'] = self._find_or_create_organization(
                context,
                package_dict
            )

            # Save license url in extras
            extras = []
            if 'license_url' in package_dict:
                extras.append(('license_url', package_dict['license_url']))
            package_dict['extras'] = extras

            package = model.Package.get(package_dict['id'])
            model.PackageRole(
                package=package,
                user=user,
                role=model.Role.ADMIN
            )

            log.debug(
                'Save or update package %s (%s)'
                % (package_dict['name'], package_dict['id'])
            )
            self._create_or_update_package(package_dict, harvest_object)

            log.debug('Save or update term translations')
            self._submit_term_translations(context, package_dict)
            Session.commit()

        except Exception, e:
            log.exception(e)
            raise
Exemple #17
0
 def gen_new_name(title):
     name = munge_title_to_name(title).replace('_', '-')
     while '--' in name:
         name = name.replace('--', '-')
     pkg_obj = Session.query(Package).filter(Package.name == name).first()
     if pkg_obj:
         return name + str(uuid.uuid4())[:5]
     else:
         return name
Exemple #18
0
 def gen_new_name(title):
     name = munge_title_to_name(title).replace("_", "-")
     while "--" in name:
         name = name.replace("--", "-")
     pkg_obj = Session.query(Package).filter(Package.name == name).first()
     if pkg_obj:
         return name + str(uuid.uuid4())[:5]
     else:
         return name
 def _find_or_create_organization(self, context):
     try:
         data_dict = {
             'permission': 'edit_group',
             'id': munge_title_to_name(self.ORGANIZATION[u'de']['name']),
             'name': munge_title_to_name(self.ORGANIZATION[u'de']['name']),
             'title': self.ORGANIZATION[u'de']['name'],
             'description': self.ORGANIZATION[u'de']['description'],
             'extras': [
                 {
                     'key': 'website',
                     'value': self.ORGANIZATION[u'de']['website']
                 }
             ]
         }
         organization = get_action('organization_show')(context, data_dict)
     except:
         organization = get_action('organization_create')(context, data_dict)
     return organization['id']
Exemple #20
0
 def get_clean_name(s):
     current = s
     counter = 1
     while True:
         current = munge.munge_title_to_name(current)
         if not _get_package(client, current):
             break
         current = "{0}_{1}".format(s, counter)
         counter = counter + 1
     return current
Exemple #21
0
    def import_stage(self, harvest_object):
        '''
        The import stage will receive a HarvestObject object and will be
        responsible for:
            - performing any necessary action with the fetched object (e.g
              create a CKAN package).
              Note: if this stage creates or updates a package, a reference
              to the package must be added to the HarvestObject.
              Additionally, the HarvestObject must be flagged as current.
            - creating the HarvestObject - Package relation (if necessary)
            - creating and storing any suitable HarvestObjectErrors that may
              occur.
            - returning True if everything went as expected, False otherwise.

        :param harvest_object: HarvestObject object
        :returns: True if everything went right, False if errors were found
        '''
        logger.debug("in import stage: %s" % harvest_object.guid)
        if not harvest_object:
            logger.error('No harvest object received')
            self._save_object_error('No harvest object received')
            return False

        try:
            self._set_config(harvest_object.job.source.config)
            context = {'model': model, 'session': Session, 'user': self.user}

            package_dict = json.loads(harvest_object.content)

            package_dict['id'] = munge_title_to_name(harvest_object.guid)
            package_dict['name'] = package_dict['id']

            # add owner_org
            source_dataset = get_action('package_show')(
                {
                    'ignore_auth': True
                }, {
                    'id': harvest_object.source.id
                })
            owner_org = source_dataset.get('owner_org')
            package_dict['owner_org'] = owner_org

            # logger.debug('Create/update package using dict: %s' % package_dict)
            self._create_or_update_package(package_dict, harvest_object,
                                           'package_show')

            Session.commit()

            logger.debug("Finished record")
        except:
            logger.exception('Something went wrong!')
            self._save_object_error('Exception in import stage',
                                    harvest_object)
            return False
        return True
Exemple #22
0
    def create_slug(self):

        title = request.params.get('title') or ''
        name = munge_title_to_name(title)
        if package_exists(name):
            valid = False
        else:
            valid = True
        #response.content_type = 'application/javascript'
        response_data = dict(name=name.replace('_', '-'), valid=valid)
        return self._finish_ok(response_data)
 def set_extras(self, package, extras):
     for k, v in extras.items():
         if k in ("title", "notes", "author", "url"):
             # these are CKAN package fields
             package[k] = v
         elif k == "tags":
             # tags are special
             package["tags"] = [{"name": munge_title_to_name(t)} for t in v]
         else:
             # everything else is an "extra"
             DatasetHarvesterBase.set_extra(package, k, v)
 def _validate_package_id(self, package_id):
     # Validate that they do not contain any HTML tags.
     match = re.search('[<>]+', package_id)
     if match:
         log.debug(
             'Package id %s contains disallowed characters'
             % package_id
         )
         return False
     else:
         return munge_title_to_name(package_id)
 def set_extras(self, package, extras):
     for k, v in extras.items():
         if k in ("title", "notes", "author", "url"):
             # these are CKAN package fields
             package[k] = v
         elif k == "tags":
             # tags are special
             package["tags"] = [ { "name": munge_title_to_name(t) } for t in v ]
         else:
             # everything else is an "extra"
             DatasetHarvesterBase.set_extra(package, k, v)
Exemple #26
0
 def _validate_package_id(self, package_id):
     # Validate that they do not contain any HTML tags.
     match = re.search('[<>]+', package_id)
     if match:
         log.debug(
             'Package id %s contains disallowed characters'
             % package_id
         )
         return False
     else:
         return munge_title_to_name(package_id)
Exemple #27
0
 def setup_class(cls):
     SpatialTestBase.setup_class()
     for fixture_x in cls.fixtures_x:
         bbox = cls.x_values_to_bbox(fixture_x)
         bbox_geojson = bbox_2_geojson(bbox)
         cls.create_package(name=munge_title_to_name(str(fixture_x)),
                            title=str(fixture_x),
                            extras=[{
                                'key': 'spatial',
                                'value': bbox_geojson
                            }])
Exemple #28
0
    def create_slug(self):

        title = request.params.get('title') or ''
        name = munge_title_to_name(title)
        if package_exists(name):
            valid = False
        else:
            valid = True
        #response.content_type = 'application/javascript'
        response_data = dict(name=name.replace('_', '-'), valid=valid)
        return self._finish_ok(response_data)
 def _find_or_create_organization(self, context, package_dict):
     org = self._find_owner(package_dict['layer_name'])
     try:
         name = self.ORGANIZATION[org]['de']['name']
         data_dict = {
             'permission': 'edit_group',
             'id': munge_title_to_name(name),
             'name': munge_title_to_name(name),
             'title': self.ORGANIZATION[org]['de']['name'],
             'description': self.ORGANIZATION[org]['de']['description'],
             'extras': [
                 {
                     'key': 'website',
                     'value': self.ORGANIZATION[org]['de']['website']
                 }
             ]
         }
         org = get_action('organization_show')(context, data_dict)
     except:
         org = get_action('organization_create')(context, data_dict)
     return org['id']
 def initial_data(self, spatial_clean_db):
     for fixture_x in self.fixtures_x:
         bbox = self.x_values_to_bbox(fixture_x)
         bbox_geojson = bbox_2_geojson(bbox)
         create_package(
             name=munge_title_to_name(six.text_type(fixture_x)),
             title=six.text_type(fixture_x),
             extras=[{
                 "key": "spatial",
                 "value": bbox_geojson
             }],
         )
Exemple #31
0
    def before_index(self, search_data):
        if not self.is_supported_package_type(search_data):
            return search_data

        extract_title = LangToString('title')
        validated_dict = json.loads(search_data['validated_data_dict'])

        # log.debug(pprint.pformat(validated_dict))

        search_data['res_name'] = [extract_title(r) for r in validated_dict[u'resources']]  # noqa
        search_data['res_description'] = [LangToString('description')(r) for r in validated_dict[u'resources']]  # noqa
        search_data['res_format'] = self._prepare_formats_for_index(validated_dict[u'resources'])  # noqa
        search_data['res_rights'] = [simplify_terms_of_use(r['rights']) for r in validated_dict[u'resources']]  # noqa
        search_data['title_string'] = extract_title(validated_dict)
        search_data['description'] = LangToString('description')(validated_dict)  # noqa
        if 'political_level' in validated_dict[u'organization']:
            search_data['political_level'] = validated_dict[u'organization'][u'political_level']  # noqa

        try:
            # index language-specific values (or it's fallback)
            text_field_items = {}
            for lang_code in get_langs():
                search_data['title_' + lang_code] = get_localized_value(
                    validated_dict['title'],
                    lang_code
                )
                search_data['title_string_' + lang_code] = munge_title_to_name(
                    get_localized_value(validated_dict['title'], lang_code)
                )
                search_data['description_' + lang_code] = get_localized_value(
                    validated_dict['description'],
                    lang_code
                )
                search_data['keywords_' + lang_code] = get_localized_value(
                    validated_dict['keywords'],
                    lang_code
                )

                text_field_items['text_' + lang_code] = [get_localized_value(validated_dict['description'], lang_code)]  # noqa
                text_field_items['text_' + lang_code].extend(search_data['keywords_' + lang_code])  # noqa
                text_field_items['text_' + lang_code].extend([r['title'][lang_code] for r in validated_dict['resources'] if r['title'][lang_code]])  # noqa
                text_field_items['text_' + lang_code].extend([r['description'][lang_code] for r in validated_dict['resources'] if r['description'][lang_code]])  # noqa

            # flatten values for text_* fields
            for key, value in text_field_items.iteritems():
                search_data[key] = ' '.join(value)

        except KeyError:
            pass

        # log.debug(pprint.pformat(search_data))
        return search_data
 def _find_or_create_organization(self, package_dict, context):
     # Find or create the organization the dataset should get assigned to.
     try:
         data_dict = {
             'id': munge_title_to_name(self.ORGANIZATION['de']),
         }
         package_dict['owner_org'] = get_action('organization_show')(
             context.copy(),
             data_dict
         )['id']
     except:
         data_dict = {
             'permission': 'edit_group',
             'id': munge_title_to_name(self.ORGANIZATION['de']),
             'name': munge_title_to_name(self.ORGANIZATION['de']),
             'title': self.ORGANIZATION['de']
         }
         organization = get_action('organization_create')(
             context.copy(),
             data_dict
         )
         package_dict['owner_org'] = organization['id']
Exemple #33
0
 def _find_or_create_organization(self, package_dict, context):
     # Find or create the organization the dataset should get assigned to.
     try:
         data_dict = {
             'id': munge_title_to_name(self.ORGANIZATION['de']),
         }
         package_dict['owner_org'] = get_action('organization_show')(
             context.copy(),
             data_dict
         )['id']
     except:
         data_dict = {
             'permission': 'edit_group',
             'id': munge_title_to_name(self.ORGANIZATION['de']),
             'name': munge_title_to_name(self.ORGANIZATION['de']),
             'title': self.ORGANIZATION['de']
         }
         organization = get_action('organization_create')(
             context.copy(),
             data_dict
         )
         package_dict['owner_org'] = organization['id']
Exemple #34
0
 def _find_or_create_organization(self, context):
     try:
         data_dict = {
             'permission':
             'edit_group',
             'id':
             munge_title_to_name(self.ORGANIZATION[u'de']['name']),
             'name':
             munge_title_to_name(self.ORGANIZATION[u'de']['name']),
             'title':
             self.ORGANIZATION[u'de']['name'],
             'description':
             self.ORGANIZATION[u'de']['description'],
             'extras': [{
                 'key': 'website',
                 'value': self.ORGANIZATION[u'de']['website']
             }]
         }
         organization = get_action('organization_show')(context, data_dict)
     except:
         organization = get_action('organization_create')(context,
                                                          data_dict)
     return organization['id']
Exemple #35
0
 def _dropzone_get_groups(self, dataset_node):
     '''
     Get the groups from the node, normalize them and get the ids.
     '''
     categories = self._get(dataset_node, 'kategorie')
     if categories:
         group_titles = categories.split(', ')
         groups = []
         for title in group_titles:
             name = munge_title_to_name(title)
             groups.append((name, title))
         return self._get_group_ids(groups)
     else:
         return []
 def _dropzone_get_groups(self, dataset_node):
     '''
     Get the groups from the node, normalize them and get the ids.
     '''
     categories = self._get(dataset_node, 'kategorie')
     if categories:
         group_titles = categories.split(', ')
         groups = []
         for title in group_titles:
             name = munge_title_to_name(title)
             groups.append((name, title))
         return self._get_group_ids(groups)
     else:
         return []
Exemple #37
0
    def before_index(self, search_data):
        if not self.is_supported_package_type(search_data):
            return search_data

        extract_title = LangToString('title')
        validated_dict = json.loads(search_data['validated_data_dict'])

        search_data['res_name'] = [extract_title(r) for r in validated_dict[u'resources']]  # noqa
        search_data['res_description'] = [LangToString('description')(r) for r in validated_dict[u'resources']]  # noqa
        search_data['res_format'] = self._prepare_formats_for_index(validated_dict[u'resources'])  # noqa
        search_data['res_rights'] = [simplify_terms_of_use(r['rights']) for r in validated_dict[u'resources']]  # noqa
        search_data['title_string'] = extract_title(validated_dict)
        search_data['description'] = LangToString('description')(validated_dict)  # noqa
        if 'political_level' in validated_dict[u'organization']:
            search_data['political_level'] = validated_dict[u'organization'][u'political_level']  # noqa

        try:
            # index language-specific values (or it's fallback)
            text_field_items = {}
            for lang_code in get_langs():
                search_data['title_' + lang_code] = get_localized_value(
                    validated_dict['title'],
                    lang_code
                )
                search_data['title_string_' + lang_code] = munge_title_to_name(
                    get_localized_value(validated_dict['title'], lang_code)
                )
                search_data['description_' + lang_code] = get_localized_value(
                    validated_dict['description'],
                    lang_code
                )
                search_data['keywords_' + lang_code] = get_localized_value(
                    validated_dict['keywords'],
                    lang_code
                )

                text_field_items['text_' + lang_code] = [get_localized_value(validated_dict['description'], lang_code)]  # noqa
                text_field_items['text_' + lang_code].extend(search_data['keywords_' + lang_code])  # noqa
                text_field_items['text_' + lang_code].extend([r['title'][lang_code] for r in validated_dict['resources'] if r['title'][lang_code]])  # noqa
                text_field_items['text_' + lang_code].extend([r['description'][lang_code] for r in validated_dict['resources'] if r['description'][lang_code]])  # noqa

            # flatten values for text_* fields
            for key, value in text_field_items.iteritems():
                search_data[key] = ' '.join(value)

        except KeyError:
            pass

        return search_data
    def make_package_name(self, title, exclude_existing_package):
        '''
        Creates a URL friendly name from a title

        If the name already exists, it will add some random characters at the end
        '''

        name = munge_title_to_name(title).replace('_', '-')
        while '--' in name:
            name = name.replace('--', '-')
        pkg_obj = Session.query(Package).filter(Package.name == name).filter(Package.id != exclude_existing_package).first()
        if pkg_obj:
            return name + str(uuid.uuid4())[:5]
        else:
            return name
Exemple #39
0
 def _find_or_create_groups(self, context):
     group_name = self.GROUPS['de'][0]
     data_dict = {
         'id': group_name,
         'name': munge_title_to_name(group_name),
         'title': group_name
     }
     try:
         group = get_action('group_show')(context, data_dict)
     except:
         group = get_action('group_create')(context, data_dict)
         log.info('created the group ' + group['id'])
     group_ids = []
     group_ids.append(group['id'])
     return group_ids
Exemple #40
0
 def _find_or_create_groups(self, context):
     group_name = self.GROUPS['de'][0]
     data_dict = {
         'id': group_name,
         'name': munge_title_to_name(group_name),
         'title': group_name
         }
     try:
         group = get_action('group_show')(context, data_dict)
     except:
         group = get_action('group_create')(context, data_dict)
         log.info('created the group ' + group['id'])
     group_ids = []
     group_ids.append(group['id'])
     return group_ids
    def _find_or_create_groups(self, groups, context):
        log.debug("Group names: %s" % groups)
        group_ids = []
        for group_name in groups:
            data_dict = {"id": group_name, "name": munge_title_to_name(group_name), "title": group_name}
            try:
                group = get_action("group_show")(context, data_dict)
                log.info("found the group " + group["id"])
            except:
                group = get_action("group_create")(context, data_dict)
                log.info("created the group " + group["id"])
            group_ids.append(group["id"])

        log.debug("Group ids: %s" % group_ids)
        return group_ids
Exemple #42
0
    def _gen_new_name(self, title):
        '''
        Creates a URL friendly name from a title

        If the name already exists, it will add some random characters at the end
        '''

        name = munge_title_to_name(title).replace('_', '-')
        while '--' in name:
            name = name.replace('--', '-')
        pkg_obj = Session.query(Package).filter(Package.name == name).first()
        if pkg_obj:
            return name + str(uuid.uuid4())[:5]
        else:
            return name
Exemple #43
0
    def make_package_name(self, title, exclude_existing_package, for_deletion):
        '''
        Creates a URL friendly name from a title

        If the name already exists, it will add some random characters at the end
        '''

        name = munge_title_to_name(title).replace('_', '-')
        if for_deletion: name = "deleted-" + name
        while '--' in name:
            name = name.replace('--', '-')
        name = name[0:90] # max length is 100
        pkg_obj = Session.query(Package).filter(Package.name == name).filter(Package.id != exclude_existing_package).first()
        if pkg_obj:
            return name + "-" + str(uuid.uuid4())[:5]
        else:
            return name
Exemple #44
0
 def gen_new_name(self, title):
     name = munge_title_to_name(title).replace('_', '-')
     while '--' in name:
         name = name.replace('--', '-')
     like_q = u'%s%%' % name
     pkg_query = Session.query(Package).filter(
         Package.name.ilike(like_q)).limit(100)
     taken = [pkg.name for pkg in pkg_query]
     if name not in taken:
         return name
     else:
         counter = 1
         while counter < 101:
             if name + six.text_type(counter) not in taken:
                 return name + six.text_type(counter)
             counter = counter + 1
         return None
Exemple #45
0
    def improve_pkg_dict(self, pkg_dict, params):
        if pkg_dict['name'] != '':
            pkg_dict['name'] = munge_name(pkg_dict['name']).replace('_', '-')
        else:
            pkg_dict['name'] = munge_title_to_name(pkg_dict['title'])
        if pkg_dict['url'] == '':
            pkg_dict.pop('url', None)

        # override the 'id' as this never matches the CKAN internal ID
        pkg_dict['id'] = pkg_dict['name']

        if params is not None and params.get(license, None) is not None:
            pkg_dict['license_id'] = params['license']
        else:
            pkg_dict['license_id'] = config.get('ckanext.ddi.default_license')

        return pkg_dict
def generate_name(data_dict):
    '''Generate a unique name based on the package's title and FIS-Broker
       guid.'''

    iso_values = data_dict['iso_values']
    package_dict = data_dict['package_dict']

    name = munge_title_to_name(package_dict['title'])
    name = re.sub('-+', '-', name)
    # ensure we don't exceed the allowed name length of 100:
    # (100-len(guid_part)-1)
    name = name[:91].strip('-')

    guid = iso_values['guid']
    guid_part = guid.split('-')[0]
    name = "{0}-{1}".format(name, guid_part)
    return name
    def process(self, record):
        record = record
        data_dict = {
            'id':
            record['ID'],
            'title':
            record['title'].strip('{}'),
            'name':
            munge_title_to_name(record['ID'] + record['title']),
            'notes':
            record['abstract'],
            'harvest_source':
            'MENDELEY',
            'creator':
            record['author'].replace(',', '').split(' and '),
            'tag_string':
            ','.join(munge_tag(tag) for tag in record['keywords'].split(',')),
            'owner_org':
            tk.config.get('ckanext.ingestor.config.mendeley_bib.owner_org',
                          'iaea'),
            'type':
            'publications'
        }
        identifiers = []
        if 'doi' in record:
            identifiers.append('doi:' + record['doi'])
        if 'isbn' in record:
            identifiers.append('isbn:' + record['isbn'])
        if 'pmid' in record:
            identifiers.append('pmid:' + record['pmid'])
        data_dict['identifier'] = identifiers

        if 'editor' in record:
            data_dict['contributor'] = [record['editor']]
        if 'publisher' in record:
            data_dict['publisher'] = [record['publisher']]
        if 'language' in record:
            data_dict['language'] = [record['language']]

        data_dict['source'] = record.get('url')
        user = tk.get_action('get_site_user')({'ignore_auth': True})
        existing = model.Package.get(data_dict['id'])
        action = tk.get_action(
            'package_update' if existing else 'package_create')
        action({'ignore_auth': True, 'user': user['name']}, data_dict)
    def get_harvested_package_dict(cls, harvest_object):
        package = CKANHarvester.get_harvested_package_dict(harvest_object)
        # change the DKAN-isms into CKAN-style
        try:
            if 'extras' not in package:
                package['extras'] = {}

            if 'name' not in package:
                package['name'] = munge.munge_title_to_name(package['title'])

            if 'description' in package:
                package['notes'] = package['description']

            for license in model.Package.get_license_register().values():
                if license.title == package['license_title']:
                    package['license_id'] = license.id
                    break
            else:
                package['license_id'] = 'notspecified'

            if 'resources' not in package:
                raise PackageDictError('Dataset has no resources')
            for resource in package['resources']:
                resource['description'] = resource['title']

                if 'revision_id' in resource:
                    del resource['revision_id']

                if 'format' not in resource:
                    resource['format'] = MIMETYPE_FORMATS.get(
                        resource.get('mimetype'), '')

            if 'private' in package:
                # DKAN appears to have datasets with private=True which are
                # still public: https://github.com/NuCivic/dkan/issues/950. If
                # they were really private then we'd not get be able to access
                # them, so assume they are not private.
                package['private'] = False

            return package
        except (Exception) as e:
            cls._save_object_error(
                'Unable to get convert DKAN to CKAN package: %s' % e,
                harvest_object)
            return None
    def _find_or_create_entity(self, entityType, entityNames, context):
        log.debug(entityType + ' names: %s' % entityNames)
        entity_ids = []
        for entity_name in entityNames:
            data_dict = {
                'id': self._utf8_and_remove_diacritics(entity_name),
                'name': munge_title_to_name(entity_name),
                'title': entity_name
            }
            try:
                entity = get_action(entityType + '_show')(context, data_dict)
                log.info('found the ' + entityType + ' with id' + entity['id'])
            except Exception:
                entity = self._create_entity(entityType, data_dict, context)

            entity_ids.append(entity['id'])

            log.debug(entityType + ' ids: %s' % entity_ids)
        return entity_ids
Exemple #50
0
def command(config_ini, nodepublisher_csv):
    config_ini_filepath = os.path.abspath(config_ini)
    load_config(config_ini_filepath)
    engine = engine_from_config(config,'sqlalchemy.')

    from ckan import model
    from ckan.lib.munge import munge_title_to_name

    logging.config.fileConfig(config_ini_filepath)
    log = logging.getLogger(os.path.basename(__file__))
    global global_log
    global_log = log

    model.init_model(engine)

    # Register a translator in this thread so that
    # the _() functions in logic layer can work
    from ckan.lib.cli import MockTranslator
    registry=Registry()
    registry.prepare()
    translator_obj=MockTranslator() 
    registry.register(translator, translator_obj) 

    model.repo.new_revision()

    log.info('Reading %s', nodepublisher_csv)
    with open(nodepublisher_csv, 'rU') as f:
        reader = csv.reader( f)
        for row in reader:
            nid, title = row
            publishers[ int(nid) ] = munge_title_to_name(title)
    # Mappings where we are getting rid of duplicate publishers
    publishers[16268] = publishers[11408] # UKSA -> ONS
    publishers[11606] = publishers[11408] # ONS
    publishers[20054] = publishers[16248] # Met Office
    publishers[33036] = publishers[15255] # Windsor & Maidenhead
    publishers[32619] = publishers[33245] # Monmouthshire
    publishers[12662] = publishers[11567] # NHS

    update_datasets()
    generate_harvest_publishers()

    log.info('Warnings: %r', warnings)
Exemple #51
0
    def _find_or_create_groups(self, groups, context):
        log.debug('Group names: %s' % groups)
        group_ids = []
        for group_name in groups:
            data_dict = {
                'id': group_name,
                'name': munge_title_to_name(group_name),
                'title': group_name
            }
            try:
                group = get_action('group_show')(context.copy(), data_dict)
                log.info('found the group ' + group['id'])
            except:
                group = get_action('group_create')(context.copy(), data_dict)
                log.info('created the group ' + group['id'])
            group_ids.append(group['id'])

        log.debug('Group ids: %s' % group_ids)
        return group_ids
Exemple #52
0
    def test_organization_import_update(self):
        """ Test updating organization import from file """
        organization_url = tools.get_organization_test_source()

        for extras in False, True:
            data = {'url': organization_url}
            if extras:
                data['public_organization'] = True
            result = organization_import.apply((simplejson.dumps(data), ))
            self.assert_true(result.successful())
            for title in u"Kainuun ty\u00f6- ja elinkeinotoimisto", u"Lapin ty\u00f6- ja elinkeinotoimisto",\
                         u"Suomen ymp\u00e4rist\u00f6keskus":
                organization = tests.call_action_api(
                    self.app,
                    'organization_show',
                    id=munge_title_to_name(title).lower())
                self.assert_equal(organization['title'], title)
                self.assert_true(
                    'public_adminstration_organization'
                    not in organization)  # We do not want this to be updated
Exemple #53
0
    def validator(key, data, errors, context):
        if errors[key]:
            return

        value = data[key]
        if value is not missing:
            if value:
                return

        output = {}

        prefix = field['autogeneration_field']
        if not prefix:
            prefix = DEFAULT_TITLE_FIELD

        log.debug('[csc_multilanguage_url] Creating field using the field %s',
                  prefix)

        prefix = prefix + '-'

        extras = data.get(key[:-1] + ('__extras', ), {})

        locales = []

        autogeneration_locale = field['autogeneration_locale']
        if autogeneration_locale:
            locales.append(autogeneration_locale)
        locale_default = config.get('ckan.locale_default', 'es')
        if locale_default:
            locales.append(locale_default)

        for l in locales:
            title_lang = prefix + l
            if title_lang in extras and extras[title_lang]:
                dataset_title = extras[title_lang]
                data[key] = munge.munge_title_to_name(dataset_title)
                log.debug(
                    '[csc_multilanguage_url] Created name "%s" for package from language %s',
                    data[key], l)
                break
        return
    def set_dataset_info(self, package, dataset, dataset_defaults):
        extra(package, "Agency", "Department of Health & Human Services")
        package["author"] = "Centers for Medicare & Medicaid Services"
        extra(package, "author_id", "http://healthdata.gov/id/agency/cms")
        extra(package, "Bureau Code", "009:38")
        package["title"] = dataset["Name"].strip()
        package["notes"] = dataset.get("Description")

        package["url"] = dataset.get("Address")

        dataset_hd = dataset["HealthData"]
        extra(package, "Date Released",
              parsedate(dataset_hd.get("DateReleased")))
        extra(package, "Date Updated",
              parsedate(dataset_hd.get("DateUpdated")))
        extra(package, "Agency Program URL",
              dataset_hd.get("AgencyProgramURL"))
        extra(package, "Subject Area 1", "Medicare")
        extra(package, "Unit of Analysis", dataset_hd.get("UnitOfAnalysis"))
        extra(package, "Data Dictionary", dataset_hd.get("DataDictionaryURL"))
        extra(package, "Coverage Period", dataset_hd.get("Coverage Period"))
        extra(package, "Collection Frequency",
              dataset_hd.get("Collection Frequency"))
        extra(package, "Geographic Scope", dataset_hd.get("GeographicScope"))
        extra(
            package, "Contact Name",
            dataset_hd.get("GenericContactName", None)
            or dataset_hd.get("ContactName")
        )  # 'X or Y' syntax returns Y if X is either None or the empty string
        extra(
            package, "Contact Email",
            dataset_hd.get("GenericContactEmail", None)
            or dataset_hd.get("ContactEmail"))
        extra(package, "License Agreement",
              dataset_hd.get("DataLicenseAgreementURL"))

        from ckan.lib.munge import munge_title_to_name
        package["tags"] = [{
            "name": munge_title_to_name(t["Name"])
        } for t in dataset.get("Keywords", [])]
Exemple #55
0
 def test_organization_import(self):
     """ Test organization import """
     organization_url = tools.get_organization_test_source()
     data = simplejson.dumps({
         'url': organization_url,
         'public_organization': True
     })
     for _ in xrange(2):
         result = organization_import.apply((data, ))
         self.assert_true(result.successful())
         for title in u"Kainuun ty\u00f6- ja elinkeinotoimisto", u"Lapin ty\u00f6- ja elinkeinotoimisto",\
                      u"Suomen ymp\u00e4rist\u00f6keskus":
             organization = tests.call_action_api(
                 self.app,
                 'organization_show',
                 id=munge_title_to_name(title).lower())
             self.assert_equal(organization['title'], title)
             public_org = 'false'
             for extra in organization['extras']:
                 if extra['key'] == 'public_adminstration_organization':
                     public_org = 'true'
             self.assert_equal(public_org, 'true')
Exemple #56
0
    def fetch_stage(self, harvest_object):
        '''
        The fetch stage will receive a HarvestObject object and will be
        responsible for:
            - getting the contents of the remote object (e.g. for a CSW server,
              perform a GetRecordById request).
            - saving the content in the provided HarvestObject.
            - creating and storing any suitable HarvestObjectErrors that may
              occur.
            - returning True if everything went as expected, False otherwise.

        :param harvest_object: HarvestObject object
        :returns: True if everything went right, False if errors were found
        '''
        logger.debug("in fetch stage: %s" % harvest_object.guid)
        try:
            self._set_config(harvest_object.job.source.config)
            content_dict = json.loads(harvest_object.content)
            content_dict['id'] = content_dict['identifier']

            content_dict['name'] = (
                munge_title_to_name(content_dict['title']) +
                content_dict['id'])[:99]
            try:
                content = json.dumps(content_dict)
            except Exception:
                logger.exception('Dumping the metadata failed!')
                self._save_object_error('Dumping the metadata failed!',
                                        harvest_object)
                return False

            harvest_object.content = content
            harvest_object.save()
        except Exception:
            logger.exception('Something went wrong!')
            self._save_object_error('Exception in fetch stage', harvest_object)
            return False

        return True
Exemple #57
0
    def get_initial_package(self, user_obj):
        """ Get the initial package from the kobo asset.
            Require the user to get the token and to validate
            ownership on the asset.
            Return a pkg_dict or raises an error """

        kobo_api = self.get_kobo_api(user_obj)
        asset = kobo_api.get_asset(self.kobo_asset_id)

        pkg = {
            'title':
            asset['name'],
            'name':
            munge_title_to_name(asset['name']),
            'notes':
            self._build_asset_notes(asset),
            'original_id':
            asset['uid'],
            'extras': [
                {
                    'key': 'kobo_asset_id',
                    'value': self.kobo_asset_id
                },
                {
                    'key': 'kobo_owner',
                    'value': asset['owner__username']
                },
                {
                    'key': 'kobo_sector',
                    'value': asset['settings'].get('sector')
                },
                {
                    'key': 'kobo_country',
                    'value': asset['settings'].get('country')
                },
            ],
        }

        return pkg
    def make_package_name(self, title, exclude_existing_package, for_deletion):
        '''
        Creates a URL friendly name from a title

        If the name already exists, it will add some random characters at the end
        '''

        name = munge_title_to_name(title).replace('_', '-')
        if for_deletion: name = "deleted-" + name
        while '--' in name:
            name = name.replace('--', '-')
        name = name[0:90]  # max length is 100

        # Is this slug already in use (and if we're updating a package, is it in
        # use by a different package?).
        pkg_obj = Session.query(Package).filter(Package.name == name).filter(
            Package.id != exclude_existing_package).first()
        if not pkg_obj:
            # The name is available, so use it. Note that if we're updating an
            # existing package we will be updating this package's URL, so incoming
            # links may break.
            return name

        if exclude_existing_package:
            # The name is not available, and we're updating a package. Chances
            # are the package's name already had some random string attached
            # to it last time. Prevent spurrious updates to the package's URL
            # (choosing new random text) by just reusing the existing package's
            # name.
            pkg_obj = Session.query(Package).filter(
                Package.id == exclude_existing_package).first()
            if pkg_obj:  # the package may not exist yet because we may be passed the desired package GUID before a new package is instantiated
                return pkg_obj.name

        # Append some random text to the URL. Hope that with five character
        # there will be no collsion.
        return name + "-" + str(uuid.uuid4())[:5]
Exemple #59
0
    def gather_stage(self, harvest_job):
        log.debug('In ZhGisHarvester gather_stage')

        ids = []
        for dataset_id, dataset in self.DATASETS.iteritems():
            csw = ckan_csw.ZhGisCkanMetadata()
            metadata = csw.get_ckan_metadata_by_id(dataset_id).copy()
            log.debug(metadata)

            # Fix metadata information
            metadata['name'] = munge_title_to_name(metadata['name'])
            metadata['service_type'] = (metadata['service_type'].replace(
                'OGC:', ''))

            # Enrich metadata with hardcoded values
            metadata['url'] = dataset['geolion_url']
            metadata['tags'].extend(dataset['tags'])

            metadata['translations'] = self._generate_term_translations()
            log.debug("Translations: %s" % metadata['translations'])

            metadata['resources'] = (
                self._generate_resource_dict_array(metadata))
            log.debug(metadata['resources'])

            metadata['license_id'] = self.LICENSE['name']
            metadata['license_url'] = self.LICENSE['url']

            obj = HarvestObject(guid=metadata['id'],
                                job=harvest_job,
                                content=json.dumps(metadata))
            obj.save()
            log.debug('adding ' + metadata['name'] + ' to the queue')
            ids.append(obj.id)

        return ids
    def run_create(self, context, data_dict, resources_sheet, archive):
        """Dataset creating proccess."""
        data_dict['name'] = munge_title_to_name(data_dict['title'])
        try:
            package_id_or_name_exists(data_dict['name'], context)
        except Invalid:
            pass
        else:
            counter = 0

            while True:
                name = '{0}-{1}'.format(data_dict['name'], counter)
                try:
                    package_id_or_name_exists(name, context)
                except Invalid:
                    data_dict['name'] = name
                    break
                counter += 1

        result = self.create_dataset(context, data_dict, resources_sheet,
                                     archive)

        if result:
            h.flash_success('Dataset was created!')