Python munge_title_to_name Exemples, ckan.lib.munge.munge_title_to_name Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : xroad_harvester.py Projet : Zharktas/ckanext-xroad_integration

    def _create_or_update_organization(self, data_dict, harvest_job):

        context = {
            'model': model,
            'session': model.Session,
            'user': self._get_user_name(),
            'ignore_auth': True,
        }

        try:
            log.info("Finding organization..")
            log.info(data_dict['id'])
            org = p.toolkit.get_action('organization_show')(context, {'id': data_dict['id']})
            log.info("found", org)

            last_finished_job = self._last_finished_job(harvest_job)
            log.info(last_finished_job)
            if last_finished_job and last_finished_job < data_dict['changed']:
                log.info("updating organization")
                org = p.toolkit.get_action('organization_update')(context, {'title': data_dict['name'],
                                                                      'name': munge_title_to_name(data_dict['name']),
                                                                      'id': data_dict['id']})
        except NotFound:
            log.info("Organization %s not found, creating...", data_dict['name'])

            # Get rid of auth audit on the context otherwise we'll get an
            # exception
            context.pop('__auth_audit', None)

            org = p.toolkit.get_action('organization_create')(context, {'title': data_dict['name'],
                                                                        'name': munge_title_to_name(data_dict['name']),
                                                                        'id': data_dict['id']})
            log.info(org)

        return org

Exemple #2

0

Afficher le fichier

Fichier : reset_publisher_parents.py Projet : diabulos/ckanext-dgu

    def do_publisher(cls, publisher_nid):
        from ckan import model
        from ckan.lib.munge import munge_title_to_name

        log = global_log

        pub = cls.get_cached_publisher_details(publisher_nid)

        title = pub["title"].strip()

        slug = munge_title_to_name(title)
        g = model.Group.get(slug)
        if g:
            log.info("Found publisher in db: %s", g.name)
        else:
            cls.status.record("Not found in CKAN db", slug, do_print=False)
            log.warn("Ignoring publisher that cannot be found in db: %s", slug)
            return

        if pub.get("parent_node"):
            parent_pub_title = cls.get_cached_publisher_details(pub["parent_node"])["title"]
            parent_name = munge_title_to_name(parent_pub_title)
            parent = model.Group.get(parent_name)
            if not parent:
                cls.status.record("Cannot find parent in CKAN db", g.name, do_print=False)
                log.warning("Cannot find parent %s of %s", parent_name, pub.name)
                return

            existing_parents = [
                m.group
                for m in model.Session.query(model.Member)
                .filter(model.Member.table_name == "group")
                .filter(model.Member.table_id == g.id)
                .filter(model.Member.state == "active")
            ]
            if existing_parents:
                if len(existing_parents) > 1:
                    log.warn("Multiple parents for %s: %r", g.name, [p.name for p in existing_parents])
                if parent in existing_parents:
                    cls.status.record("Correct parent already", g.name, do_print=False)
                    log.info("Correct parent already: %s parent of %s", parent.name, g.name)
                    return
                else:
                    cls.status.record("Has another parent", g.name, do_print=False)
                    log.info(
                        "Has another parent: %r (instead of %s) parent of %s",
                        [p.name for p in existing_parents],
                        parent.name,
                        g.name,
                    )
                    return

            m = model.Member(group=parent, table_id=g.id, table_name="group")
            model.Session.add(m)
            model.Session.commit()
            cls.status.record("Parent added", slug, do_print=False)
            log.info("%s is made parent of %s", parent.name, g.name)
        else:
            log.info("%s has no parent in Drupal" % g.name)
            cls.status.record("Has no parent in Drupal", g.name, do_print=False)

Exemple #3

0

Afficher le fichier

Fichier : publisher_categories.py Projet : ArunEG/ckanext-dgu

    def add_publisher(cls, publisher_nid):
        from ckan import model
        from ckan.lib.munge import munge_title_to_name

        if int(publisher_nid) in ignore_publishers:
            global_log.info('Publisher ignored: %s (%s)', publisher_nid,
                            cls.get_cached_publisher_details(publisher_nid))
            return

        pub = cls.get_cached_publisher_details(publisher_nid)

        title = pub['title'].strip()

        slug = munge_title_to_name(title)
        g = model.Group.get(slug)
        if g:
            global_log.info('Publisher already exists in db: %s', slug)
        else:
            g = model.Group(name=slug)
            model.Session.add(g)

        g.title=title
        g.type='publisher'
        g.description=pub['body']
        field_pub_web_title = pub['field_pub_web'][0]['title'] if pub['field_pub_web'] else ''
        g.extras['contact-name'] = '%s contact' % field_pub_web_title if field_pub_web_title else ''
        g.extras['contact-email'] = pub['field_pub_email_display'][0]['email'] if pub['field_pub_email_display'] else ''
        g.extras['contact-phone'] = ''
        g.extras['foi-name'] = ''
        g.extras['foi-email'] = ''
        g.extras['foi-web'] = ''
        g.extras['foi-phone'] = ''
        acronym = pub['field_acronym'][0]['value'] if pub['field_acronym'] else ''
        g.extras['abbreviation'] = acronym or ''
        g.extras['website-url'] = (pub['field_pub_web'][0]['url'] or '') if pub['field_pub_web'] else ''
        g.extras['website-name'] = (pub['field_pub_web'][0]['title'] or '') if pub['field_pub_web'] else ''
        model.Session.commit()
        title_and_abbreviation = '%s (%s)' % (title, acronym) if acronym else title
        global_log.info('Added/edited publisher: %s <%s>', title_and_abbreviation, publisher_nid)

        if pub.get('parent_node'):
            parent_pub_title = cls.get_cached_publisher_details(pub['parent_node'])['title']
            parent = model.Group.get(munge_title_to_name(parent_pub_title))
            if not parent:
                parent = cls.add_publisher(pub['parent_node'])

            if model.Session.query(model.Member).\
                filter(model.Member.group==parent).\
                filter(model.Member.table_id==g.id).count() == 0:
                m = model.Member(group=parent, table_id=g.id, table_name='group')
                model.Session.add(m)
                global_log.info('%s is parent of %s', parent.name, g.name)
            else:
                global_log.info('%s is already a parent of %s', parent.name, g.name)
            model.Session.commit()

        return g

Exemple #4

0

Afficher le fichier

Fichier : reset_publisher_parents.py Projet : ArunEG/ckanext-dgu

    def do_publisher(cls, publisher_nid):
        from ckan import model
        from ckan.lib.munge import munge_title_to_name
        log = global_log

        pub = cls.get_cached_publisher_details(publisher_nid)

        title = pub['title'].strip()

        slug = munge_title_to_name(title)
        g = model.Group.get(slug)
        if g:
            log.info('Found publisher in db: %s', g.name)
        else:
            cls.status.record('Not found in CKAN db', slug, do_print=False)
            log.warn('Ignoring publisher that cannot be found in db: %s', slug)
            return

        if pub.get('parent_node'):
            parent_pub_title = cls.get_cached_publisher_details(pub['parent_node'])['title']
            parent_name = munge_title_to_name(parent_pub_title)
            parent = model.Group.get(parent_name)
            if not parent:
                cls.status.record('Cannot find parent in CKAN db', g.name, do_print=False)
                log.warning('Cannot find parent %s of %s', parent_name, pub.name)
                return

            existing_parents = [m.group for m in model.Session.query(model.Member).\
                                filter(model.Member.table_name=='group').\
                                filter(model.Member.table_id==g.id).\
                                filter(model.Member.state=='active')]
            if existing_parents:
                if len(existing_parents) > 1:
                    log.warn('Multiple parents for %s: %r', g.name,
                             [p.name for p in existing_parents])
                if parent in existing_parents:
                    cls.status.record('Correct parent already',
                                       g.name, do_print=False)
                    log.info('Correct parent already: %s parent of %s',
                             parent.name, g.name)
                    return
                else:
                    cls.status.record('Has another parent',
                                       g.name, do_print=False)
                    log.info('Has another parent: %r (instead of %s) parent of %s',
                             [p.name for p in existing_parents], parent.name, g.name)
                    return

            m = model.Member(group=parent, table_id=g.id, table_name='group')
            model.Session.add(m)
            model.Session.commit()
            cls.status.record('Parent added', slug, do_print=False)
            log.info('%s is made parent of %s', parent.name, g.name)
        else:
            log.info('%s has no parent in Drupal' % g.name)
            cls.status.record('Has no parent in Drupal',
                               g.name, do_print=False)

Exemple #5

0

Afficher le fichier

Fichier : xroad_harvester.py Projet : vrk-kpa/ckanext-xroad_integration

    def _create_or_update_organization(self, data_dict, harvest_job):

        context = {
            'model': model,
            'session': model.Session,
            'user': self._get_user_name(),
            'ignore_auth': True,
        }

        try:
            org = p.toolkit.get_action('organization_show')(context, {'id': data_dict['id']})
        except NotFound:
            org = None

        if org:
            log.info("found %s", org)

            if data_dict['removed']:
                log.info("Organization was removed, removing from catalog..")
                p.toolkit.get_action('organization_delete')(context, org)
                return None

            if self.config.get('force_all', False) is True:
                last_time = "2011-01-01"
            else:
                last_time = self._last_error_free_job_time(harvest_job)
            if last_time and last_time < data_dict['changed']:
                org_data = {
                        'title': data_dict['name'],
                        'name': munge_title_to_name(data_dict['name']),
                        'id': data_dict['id']}
                org = p.toolkit.get_action('organization_patch')(context, org_data)

        else:
            log.info("Organization %s not found, creating...", data_dict['name'])

            if data_dict['removed']:
                log.info("Organization was removed, not creating..")
                return None

            # Get rid of auth audit on the context otherwise we'll get an
            # exception
            context.pop('__auth_audit', None)

            org_data = {
                    'title': data_dict['name'],
                    'name': munge_title_to_name(data_dict['name']),
                    'id': data_dict['id']}
            org = p.toolkit.get_action('organization_create')(context, org_data)
            log.info(org)

        return org

Exemple #6

0

Afficher le fichier

Fichier : base.py Projet : AQUACROSS/ckanext-harvest

    def _gen_new_name(cls, title, existing_name=None,
                      append_type='number-sequence'):
        '''
        Returns a 'name' for the dataset (URL friendly), based on the title.

        If the ideal name is already used, it will append a number to it to
        ensure it is unique.

        If generating a new name because the title of the dataset has changed,
        specify the existing name, in case the name doesn't need to change
        after all.

        :param existing_name: the current name of the dataset - only specify
                              this if the dataset exists
        :type existing_name: string
        :param append_type: the type of characters to add to make it unique -
                            either 'number-sequence' or 'random-hex'.
        :type append_type: string
        '''

        ideal_name = munge_title_to_name(title)
        ideal_name = re.sub('-+', '-', ideal_name)  # collapse multiple dashes
        return cls._ensure_name_is_unique(ideal_name,
                                          existing_name=existing_name,
                                          append_type=append_type)

Exemple #7

0

Afficher le fichier

Fichier : base.py Projet : ckan/ckanext-harvest

    def _gen_new_name(cls, title, existing_name=None,
                      append_type=None):
        '''
        Returns a 'name' for the dataset (URL friendly), based on the title.

        If the ideal name is already used, it will append a number to it to
        ensure it is unique.

        If generating a new name because the title of the dataset has changed,
        specify the existing name, in case the name doesn't need to change
        after all.

        :param existing_name: the current name of the dataset - only specify
                              this if the dataset exists
        :type existing_name: string
        :param append_type: the type of characters to add to make it unique -
                            either 'number-sequence' or 'random-hex'.
        :type append_type: string
        '''

        # If append_type was given, use it. Otherwise, use the configured default.
        # If nothing was given and no defaults were set, use 'number-sequence'.
        if append_type:
            append_type_param = append_type
        else:
            append_type_param = config.get('ckanext.harvest.default_dataset_name_append',
                                           'number-sequence')

        ideal_name = munge_title_to_name(title)
        ideal_name = re.sub('-+', '-', ideal_name)  # collapse multiple dashes
        return cls._ensure_name_is_unique(ideal_name,
                                          existing_name=existing_name,
                                          append_type=append_type_param)

Exemple #8

0

Afficher le fichier

Fichier : tasks.py Projet : haphut/ytp

def organization_import(data):
    """ Import organizations """
    _load_config()
    context = _create_context()
    configuration = simplejson.loads(data)
    data_url = configuration.get('url')
    public_organization = configuration.get('public_organization', False)

    with closing(urllib2.urlopen(data_url)) as source:
        data = simplejson.load(source)

        for item in data:
            values = {}
            if isinstance(item, basestring):
                values['title'] = item.strip()
                values['name'] = munge_title_to_name(values['title']).lower()
            else:
                values['name'] = item.pop('name')
                values['title'] = item.pop('title')
                values['description'] = item.pop('description', None)
                values['extras'] = [{'key': key, 'value': value} for key, value in item.iteritems()]
            values['id'] = values['name']

            if public_organization:
                values['extras'] = [{'key': 'public_adminstration_organization', 'value': 'true'}]
            try:
                get_action('organization_show')(context, {'id': values['id']})
                # Do not override organizations
            except NotFound:
                get_action('organization_create')(context, values)

Exemple #9

0

Afficher le fichier

Fichier : harvester_base.py Projet : lanceseidman/ckanext-datajson

    def make_package_name(self, title, exclude_existing_package):
        '''
        Creates a URL friendly name from a title

        If the name already exists, it will add some random characters at the end
        '''

        name = munge_title_to_name(title).replace('_', '-')
        while '--' in name:
            name = name.replace('--', '-')
        name = name[0:90] # max length is 100

        # Is this slug already in use (and if we're updating a package, is it in
        # use by a different package?).
        pkg_obj = Session.query(Package).filter(Package.name == name).filter(Package.id != exclude_existing_package).first()
        if not pkg_obj:
            # The name is available, so use it. Note that if we're updating an
            # existing package we will be updating this package's URL, so incoming
            # links may break.
            return name

        if exclude_existing_package:
            # The name is not available, and we're updating a package. Chances
            # are the package's name already had some random string attached
            # to it last time. Prevent spurrious updates to the package's URL
            # (choosing new random text) by just reusing the existing package's
            # name.
            pkg_obj = Session.query(Package).filter(Package.id == exclude_existing_package).first()
            if pkg_obj: # the package may not exist yet because we may be passed the desired package GUID before a new package is instantiated
                return pkg_obj.name

        # Append some random text to the URL. Hope that with five character
        # there will be no collsion.
        return name + "-" + str(uuid.uuid4())[:5]

Exemple #10

0

Afficher le fichier

Fichier : harvester_cmsdatanavigator.py Projet : dportnoy/ckanext-datajson

    def set_dataset_info(self, package, dataset, harvester_config):
        extra(package, "Agency", "Department of Health & Human Services")
        package["author"] = "Centers for Medicare & Medicaid Services"
        extra(package, "author_id", "http://healthdata.gov/id/agency/cms")
        extra(package, "Bureau Code", "009:38")
        package["title"] = dataset["Name"].strip()
        package["notes"] = dataset.get("Description")
        
        package["url"] = dataset.get("Address")

        dataset_hd = dataset["HealthData"]
        extra(package, "Date Released", parsedate(dataset_hd.get("DateReleased")))
        extra(package, "Date Updated", parsedate(dataset_hd.get("DateUpdated")))
        extra(package, "Agency Program URL", dataset_hd.get("AgencyProgramURL"))
        extra(package, "Subject Area 1", "Medicare")
        extra(package, "Unit of Analysis", dataset_hd.get("UnitOfAnalysis"))
        extra(package, "Data Dictionary", dataset_hd.get("DataDictionaryURL"))
        extra(package, "Coverage Period", dataset_hd.get("Coverage Period"))
        extra(package, "Collection Frequency", dataset_hd.get("Collection Frequency"))
        extra(package, "Geographic Scope", dataset_hd.get("GeographicScope"))
        #extra(package, "Contact Name", dataset_hd.get("GenericContactName", None) or dataset_hd.get("ContactName")) # 'X or Y' syntax returns Y if X is either None or the empty string
        #extra(package, "Contact Email", dataset_hd.get("GenericContactEmail", None) or dataset_hd.get("ContactEmail"))
        extra(package, "License Agreement", dataset_hd.get("DataLicenseAgreementURL"))
        
        from ckan.lib.munge import munge_title_to_name
        package["tags"] = [ { "name": munge_title_to_name(t["Name"]) } for t in dataset.get("Keywords", [])]

Exemple #11

0

Afficher le fichier

Fichier : migrate.py Projet : dersteppenwolf/ckanext-showcase

    def migrate(self):
        '''

        '''
        related_items = get_action('related_list')(data_dict={})

        # preflight:
        # related items must have unique titles before migration
        related_titles = [i['title'] for i in related_items]
        # make a list of duplicate titles
        duplicate_titles = self._find_duplicates(related_titles)
        if duplicate_titles:
            print(
                """All Related Items must have unique titles before migration. The following
Related Item titles are used more than once and need to be corrected before
migration can continue. Please correct and try again:"""
            )
            for i in duplicate_titles:
                print(i)
            return

        for related in related_items:
            existing_showcase = get_action('package_search')(
                data_dict={'fq': '+dataset_type:showcase original_related_item_id:{0}'.format(related['id'])})
            normalized_title = substitute_ascii_equivalents(related['title'])
            if existing_showcase['count'] > 0:
                print('Showcase for Related Item "{0}" already exists.'.format(
                    normalized_title))
            else:
                data_dict = {
                    'original_related_item_id': related.get('id'),
                    'title': related.get('title'),
                    'name': munge_title_to_name(related.get('title')),
                    'notes': related.get('description'),
                    'image_url': related.get('image_url'),
                    'url': related.get('url'),
                    'tags': [{"name": related.get('type').lower()}]
                }
                # make the showcase
                try:
                    new_showcase = get_action('ckanext_showcase_create')(
                        data_dict=data_dict)
                except Exception as e:
                    print('There was a problem migrating "{0}": {1}'.format(
                        normalized_title, e))
                else:
                    print('Created Showcase from the Related Item "{0}"'.format(normalized_title))

                    # make the showcase_package_association, if needed
                    try:
                        related_pkg_id = self._get_related_dataset(
                            related['id'])
                        if related_pkg_id:
                            get_action('ckanext_showcase_package_association_create')(
                                data_dict={'showcase_id': new_showcase['id'],
                                           'package_id': related_pkg_id})
                    except Exception as e:
                        print('There was a problem creating the showcase_package_association for "{0}": {1}'.format(
                            normalized_title, e))

Exemple #12

0

Afficher le fichier

Fichier : base.py Projet : Web5design/ckanext-harvest

 def _gen_new_name(self,title):
     '''
     Creates a URL friendly name from a title
     '''
     name = munge_title_to_name(title).replace('_', '-')
     while '--' in name:
         name = name.replace('--', '-')
     return name

Exemple #13

0

Afficher le fichier

Fichier : migrate.py Projet : Ontodia/ckanext-showcase

 def _gen_new_title(self, title, related_id):
     name = munge_title_to_name(title)
     pkg_obj = model.Session.query(model.Package).filter_by(name=name).first()
     if pkg_obj:
         title.replace('duplicate_', '')
         return 'duplicate_' + title + '_' + related_id
     else:
         return title

Exemple #14

0

Afficher le fichier

Fichier : commands.py Projet : govro/ckanext-inventory

 def create_organization_dict(self, inventory_id, title):
     return {
         "name": munge_title_to_name(title),
         "title": title,
         "inventory_organization_id": inventory_id,
         "is_organization": True,
         "type": "organization",
     }

Exemple #15

0

Afficher le fichier

Fichier : test_spatial.py Projet : fraunhoferfokus/ckanext-spatial

 def setup_class(cls):
     SpatialTestBase.setup_class()
     for fixture_x in cls.fixtures_x:
         bbox = cls.x_values_to_bbox(fixture_x)
         bbox_geojson = bbox_2_geojson(bbox)
         cls.create_package(name=munge_title_to_name(str(fixture_x)),
                            title=str(fixture_x),
                            extras=[{'key': 'spatial',
                                     'value': bbox_geojson}])

Exemple #16

0

Afficher le fichier

Fichier : swisstopoharvester.py Projet : ogdch/ckanext-swisstopo

    def import_stage(self, harvest_object):
        log.debug('In SwisstopoHarvester import_stage')

        if not harvest_object:
            log.error('No harvest object received')
            return False

        try:
            package_dict = json.loads(harvest_object.content)

            package_dict['id'] = harvest_object.guid
            package_dict['name'] = munge_title_to_name(
                package_dict['layer_name']
            )
            user = model.User.get(self.HARVEST_USER)
            context = {
                'model': model,
                'session': Session,
                'user': self.HARVEST_USER
                }

            # Find or create group the dataset should get assigned to
            package_dict['groups'] = self._find_or_create_groups(context)

            # Find or create the organization
            # the dataset should get assigned to
            package_dict['owner_org'] = self._find_or_create_organization(
                context,
                package_dict
            )

            # Save license url in extras
            extras = []
            if 'license_url' in package_dict:
                extras.append(('license_url', package_dict['license_url']))
            package_dict['extras'] = extras

            package = model.Package.get(package_dict['id'])
            model.PackageRole(
                package=package,
                user=user,
                role=model.Role.ADMIN
            )

            log.debug(
                'Save or update package %s (%s)'
                % (package_dict['name'], package_dict['id'])
            )
            self._create_or_update_package(package_dict, harvest_object)

            log.debug('Save or update term translations')
            self._submit_term_translations(context, package_dict)
            Session.commit()

        except Exception, e:
            log.exception(e)
            raise

Exemple #17

0

Afficher le fichier

Fichier : __init__.py Projet : Zharktas/ckanext-harvest

 def gen_new_name(title):
     name = munge_title_to_name(title).replace('_', '-')
     while '--' in name:
         name = name.replace('--', '-')
     pkg_obj = Session.query(Package).filter(Package.name == name).first()
     if pkg_obj:
         return name + str(uuid.uuid4())[:5]
     else:
         return name

Exemple #18

0

Afficher le fichier

Fichier : __init__.py Projet : ckan/ckanext-harvest

 def gen_new_name(title):
     name = munge_title_to_name(title).replace("_", "-")
     while "--" in name:
         name = name.replace("--", "-")
     pkg_obj = Session.query(Package).filter(Package.name == name).first()
     if pkg_obj:
         return name + str(uuid.uuid4())[:5]
     else:
         return name

Exemple #19

0

Afficher le fichier

Fichier : zhgisharvester.py Projet : fbelfort/ckanext-zhgis

 def _find_or_create_organization(self, context):
     try:
         data_dict = {
             'permission': 'edit_group',
             'id': munge_title_to_name(self.ORGANIZATION[u'de']['name']),
             'name': munge_title_to_name(self.ORGANIZATION[u'de']['name']),
             'title': self.ORGANIZATION[u'de']['name'],
             'description': self.ORGANIZATION[u'de']['description'],
             'extras': [
                 {
                     'key': 'website',
                     'value': self.ORGANIZATION[u'de']['website']
                 }
             ]
         }
         organization = get_action('organization_show')(context, data_dict)
     except:
         organization = get_action('organization_create')(context, data_dict)
     return organization['id']

Exemple #20

0

Afficher le fichier

Fichier : tasks.py Projet : salum-ar/ckanext-dgu

 def get_clean_name(s):
     current = s
     counter = 1
     while True:
         current = munge.munge_title_to_name(current)
         if not _get_package(client, current):
             break
         current = "{0}_{1}".format(s, counter)
         counter = counter + 1
     return current

Exemple #21

0

Afficher le fichier

    def import_stage(self, harvest_object):
        '''
        The import stage will receive a HarvestObject object and will be
        responsible for:
            - performing any necessary action with the fetched object (e.g
              create a CKAN package).
              Note: if this stage creates or updates a package, a reference
              to the package must be added to the HarvestObject.
              Additionally, the HarvestObject must be flagged as current.
            - creating the HarvestObject - Package relation (if necessary)
            - creating and storing any suitable HarvestObjectErrors that may
              occur.
            - returning True if everything went as expected, False otherwise.

        :param harvest_object: HarvestObject object
        :returns: True if everything went right, False if errors were found
        '''
        logger.debug("in import stage: %s" % harvest_object.guid)
        if not harvest_object:
            logger.error('No harvest object received')
            self._save_object_error('No harvest object received')
            return False

        try:
            self._set_config(harvest_object.job.source.config)
            context = {'model': model, 'session': Session, 'user': self.user}

            package_dict = json.loads(harvest_object.content)

            package_dict['id'] = munge_title_to_name(harvest_object.guid)
            package_dict['name'] = package_dict['id']

            # add owner_org
            source_dataset = get_action('package_show')(
                {
                    'ignore_auth': True
                }, {
                    'id': harvest_object.source.id
                })
            owner_org = source_dataset.get('owner_org')
            package_dict['owner_org'] = owner_org

            # logger.debug('Create/update package using dict: %s' % package_dict)
            self._create_or_update_package(package_dict, harvest_object,
                                           'package_show')

            Session.commit()

            logger.debug("Finished record")
        except:
            logger.exception('Something went wrong!')
            self._save_object_error('Exception in import stage',
                                    harvest_object)
            return False
        return True

Exemple #22

0

Afficher le fichier

    def create_slug(self):

        title = request.params.get('title') or ''
        name = munge_title_to_name(title)
        if package_exists(name):
            valid = False
        else:
            valid = True
        #response.content_type = 'application/javascript'
        response_data = dict(name=name.replace('_', '-'), valid=valid)
        return self._finish_ok(response_data)

Exemple #23

0

Afficher le fichier

Fichier : harvester_base.py Projet : govtmirror/ckanext-datajson

 def set_extras(self, package, extras):
     for k, v in extras.items():
         if k in ("title", "notes", "author", "url"):
             # these are CKAN package fields
             package[k] = v
         elif k == "tags":
             # tags are special
             package["tags"] = [{"name": munge_title_to_name(t)} for t in v]
         else:
             # everything else is an "extra"
             DatasetHarvesterBase.set_extra(package, k, v)

Exemple #24

0

Afficher le fichier

Fichier : harvester.py Projet : opendatazurich/ckanext-stadtzh-harvest

 def _validate_package_id(self, package_id):
     # Validate that they do not contain any HTML tags.
     match = re.search('[<>]+', package_id)
     if match:
         log.debug(
             'Package id %s contains disallowed characters'
             % package_id
         )
         return False
     else:
         return munge_title_to_name(package_id)

Exemple #25

0

Afficher le fichier

Fichier : harvester_base.py Projet : edsu/ckanext-datajson

 def set_extras(self, package, extras):
     for k, v in extras.items():
         if k in ("title", "notes", "author", "url"):
             # these are CKAN package fields
             package[k] = v
         elif k == "tags":
             # tags are special
             package["tags"] = [ { "name": munge_title_to_name(t) } for t in v ]
         else:
             # everything else is an "extra"
             DatasetHarvesterBase.set_extra(package, k, v)

Exemple #26

0

Afficher le fichier

 def _validate_package_id(self, package_id):
     # Validate that they do not contain any HTML tags.
     match = re.search('[<>]+', package_id)
     if match:
         log.debug(
             'Package id %s contains disallowed characters'
             % package_id
         )
         return False
     else:
         return munge_title_to_name(package_id)

Exemple #27

0

Afficher le fichier

 def setup_class(cls):
     SpatialTestBase.setup_class()
     for fixture_x in cls.fixtures_x:
         bbox = cls.x_values_to_bbox(fixture_x)
         bbox_geojson = bbox_2_geojson(bbox)
         cls.create_package(name=munge_title_to_name(str(fixture_x)),
                            title=str(fixture_x),
                            extras=[{
                                'key': 'spatial',
                                'value': bbox_geojson
                            }])

Exemple #28

0

Afficher le fichier

Fichier : api.py Projet : AdamJensen-dk/ckan-drupal

    def create_slug(self):

        title = request.params.get('title') or ''
        name = munge_title_to_name(title)
        if package_exists(name):
            valid = False
        else:
            valid = True
        #response.content_type = 'application/javascript'
        response_data = dict(name=name.replace('_', '-'), valid=valid)
        return self._finish_ok(response_data)

Exemple #29

0

Afficher le fichier

Fichier : swisstopoharvester.py Projet : ogdch/ckanext-swisstopo

 def _find_or_create_organization(self, context, package_dict):
     org = self._find_owner(package_dict['layer_name'])
     try:
         name = self.ORGANIZATION[org]['de']['name']
         data_dict = {
             'permission': 'edit_group',
             'id': munge_title_to_name(name),
             'name': munge_title_to_name(name),
             'title': self.ORGANIZATION[org]['de']['name'],
             'description': self.ORGANIZATION[org]['de']['description'],
             'extras': [
                 {
                     'key': 'website',
                     'value': self.ORGANIZATION[org]['de']['website']
                 }
             ]
         }
         org = get_action('organization_show')(context, data_dict)
     except:
         org = get_action('organization_create')(context, data_dict)
     return org['id']

Exemple #30

0

Afficher le fichier

Fichier : test_spatial.py Projet : EnviDat/ckanext-spatial

 def initial_data(self, spatial_clean_db):
     for fixture_x in self.fixtures_x:
         bbox = self.x_values_to_bbox(fixture_x)
         bbox_geojson = bbox_2_geojson(bbox)
         create_package(
             name=munge_title_to_name(six.text_type(fixture_x)),
             title=six.text_type(fixture_x),
             extras=[{
                 "key": "spatial",
                 "value": bbox_geojson
             }],
         )

Exemple #31

0

Afficher le fichier

Fichier : plugin.py Projet : ogdch/ckanext-switzerland

    def before_index(self, search_data):
        if not self.is_supported_package_type(search_data):
            return search_data

        extract_title = LangToString('title')
        validated_dict = json.loads(search_data['validated_data_dict'])

        # log.debug(pprint.pformat(validated_dict))

        search_data['res_name'] = [extract_title(r) for r in validated_dict[u'resources']]  # noqa
        search_data['res_description'] = [LangToString('description')(r) for r in validated_dict[u'resources']]  # noqa
        search_data['res_format'] = self._prepare_formats_for_index(validated_dict[u'resources'])  # noqa
        search_data['res_rights'] = [simplify_terms_of_use(r['rights']) for r in validated_dict[u'resources']]  # noqa
        search_data['title_string'] = extract_title(validated_dict)
        search_data['description'] = LangToString('description')(validated_dict)  # noqa
        if 'political_level' in validated_dict[u'organization']:
            search_data['political_level'] = validated_dict[u'organization'][u'political_level']  # noqa

        try:
            # index language-specific values (or it's fallback)
            text_field_items = {}
            for lang_code in get_langs():
                search_data['title_' + lang_code] = get_localized_value(
                    validated_dict['title'],
                    lang_code
                )
                search_data['title_string_' + lang_code] = munge_title_to_name(
                    get_localized_value(validated_dict['title'], lang_code)
                )
                search_data['description_' + lang_code] = get_localized_value(
                    validated_dict['description'],
                    lang_code
                )
                search_data['keywords_' + lang_code] = get_localized_value(
                    validated_dict['keywords'],
                    lang_code
                )

                text_field_items['text_' + lang_code] = [get_localized_value(validated_dict['description'], lang_code)]  # noqa
                text_field_items['text_' + lang_code].extend(search_data['keywords_' + lang_code])  # noqa
                text_field_items['text_' + lang_code].extend([r['title'][lang_code] for r in validated_dict['resources'] if r['title'][lang_code]])  # noqa
                text_field_items['text_' + lang_code].extend([r['description'][lang_code] for r in validated_dict['resources'] if r['description'][lang_code]])  # noqa

            # flatten values for text_* fields
            for key, value in text_field_items.iteritems():
                search_data[key] = ' '.join(value)

        except KeyError:
            pass

        # log.debug(pprint.pformat(search_data))
        return search_data

Exemple #32

0

Afficher le fichier

Fichier : harvester.py Projet : opendatazurich/ckanext-stadtzh-harvest

 def _find_or_create_organization(self, package_dict, context):
     # Find or create the organization the dataset should get assigned to.
     try:
         data_dict = {
             'id': munge_title_to_name(self.ORGANIZATION['de']),
         }
         package_dict['owner_org'] = get_action('organization_show')(
             context.copy(),
             data_dict
         )['id']
     except:
         data_dict = {
             'permission': 'edit_group',
             'id': munge_title_to_name(self.ORGANIZATION['de']),
             'name': munge_title_to_name(self.ORGANIZATION['de']),
             'title': self.ORGANIZATION['de']
         }
         organization = get_action('organization_create')(
             context.copy(),
             data_dict
         )
         package_dict['owner_org'] = organization['id']

Exemple #33

0

Afficher le fichier

 def _find_or_create_organization(self, package_dict, context):
     # Find or create the organization the dataset should get assigned to.
     try:
         data_dict = {
             'id': munge_title_to_name(self.ORGANIZATION['de']),
         }
         package_dict['owner_org'] = get_action('organization_show')(
             context.copy(),
             data_dict
         )['id']
     except:
         data_dict = {
             'permission': 'edit_group',
             'id': munge_title_to_name(self.ORGANIZATION['de']),
             'name': munge_title_to_name(self.ORGANIZATION['de']),
             'title': self.ORGANIZATION['de']
         }
         organization = get_action('organization_create')(
             context.copy(),
             data_dict
         )
         package_dict['owner_org'] = organization['id']

Exemple #34

0

Afficher le fichier

 def _find_or_create_organization(self, context):
     try:
         data_dict = {
             'permission':
             'edit_group',
             'id':
             munge_title_to_name(self.ORGANIZATION[u'de']['name']),
             'name':
             munge_title_to_name(self.ORGANIZATION[u'de']['name']),
             'title':
             self.ORGANIZATION[u'de']['name'],
             'description':
             self.ORGANIZATION[u'de']['description'],
             'extras': [{
                 'key': 'website',
                 'value': self.ORGANIZATION[u'de']['website']
             }]
         }
         organization = get_action('organization_show')(context, data_dict)
     except:
         organization = get_action('organization_create')(context,
                                                          data_dict)
     return organization['id']

Exemple #35

0

Afficher le fichier

 def _dropzone_get_groups(self, dataset_node):
     '''
     Get the groups from the node, normalize them and get the ids.
     '''
     categories = self._get(dataset_node, 'kategorie')
     if categories:
         group_titles = categories.split(', ')
         groups = []
         for title in group_titles:
             name = munge_title_to_name(title)
             groups.append((name, title))
         return self._get_group_ids(groups)
     else:
         return []

Exemple #36

0

Afficher le fichier

Fichier : harvester.py Projet : opendatazurich/ckanext-stadtzh-harvest

 def _dropzone_get_groups(self, dataset_node):
     '''
     Get the groups from the node, normalize them and get the ids.
     '''
     categories = self._get(dataset_node, 'kategorie')
     if categories:
         group_titles = categories.split(', ')
         groups = []
         for title in group_titles:
             name = munge_title_to_name(title)
             groups.append((name, title))
         return self._get_group_ids(groups)
     else:
         return []

Exemple #37

0

Afficher le fichier

    def before_index(self, search_data):
        if not self.is_supported_package_type(search_data):
            return search_data

        extract_title = LangToString('title')
        validated_dict = json.loads(search_data['validated_data_dict'])

        search_data['res_name'] = [extract_title(r) for r in validated_dict[u'resources']]  # noqa
        search_data['res_description'] = [LangToString('description')(r) for r in validated_dict[u'resources']]  # noqa
        search_data['res_format'] = self._prepare_formats_for_index(validated_dict[u'resources'])  # noqa
        search_data['res_rights'] = [simplify_terms_of_use(r['rights']) for r in validated_dict[u'resources']]  # noqa
        search_data['title_string'] = extract_title(validated_dict)
        search_data['description'] = LangToString('description')(validated_dict)  # noqa
        if 'political_level' in validated_dict[u'organization']:
            search_data['political_level'] = validated_dict[u'organization'][u'political_level']  # noqa

        try:
            # index language-specific values (or it's fallback)
            text_field_items = {}
            for lang_code in get_langs():
                search_data['title_' + lang_code] = get_localized_value(
                    validated_dict['title'],
                    lang_code
                )
                search_data['title_string_' + lang_code] = munge_title_to_name(
                    get_localized_value(validated_dict['title'], lang_code)
                )
                search_data['description_' + lang_code] = get_localized_value(
                    validated_dict['description'],
                    lang_code
                )
                search_data['keywords_' + lang_code] = get_localized_value(
                    validated_dict['keywords'],
                    lang_code
                )

                text_field_items['text_' + lang_code] = [get_localized_value(validated_dict['description'], lang_code)]  # noqa
                text_field_items['text_' + lang_code].extend(search_data['keywords_' + lang_code])  # noqa
                text_field_items['text_' + lang_code].extend([r['title'][lang_code] for r in validated_dict['resources'] if r['title'][lang_code]])  # noqa
                text_field_items['text_' + lang_code].extend([r['description'][lang_code] for r in validated_dict['resources'] if r['description'][lang_code]])  # noqa

            # flatten values for text_* fields
            for key, value in text_field_items.iteritems():
                search_data[key] = ' '.join(value)

        except KeyError:
            pass

        return search_data

Exemple #38

0

Afficher le fichier

Fichier : datajsonharvester.py Projet : project-open-data/ckanext-datajson

    def make_package_name(self, title, exclude_existing_package):
        '''
        Creates a URL friendly name from a title

        If the name already exists, it will add some random characters at the end
        '''

        name = munge_title_to_name(title).replace('_', '-')
        while '--' in name:
            name = name.replace('--', '-')
        pkg_obj = Session.query(Package).filter(Package.name == name).filter(Package.id != exclude_existing_package).first()
        if pkg_obj:
            return name + str(uuid.uuid4())[:5]
        else:
            return name

Exemple #39

0

Afficher le fichier

 def _find_or_create_groups(self, context):
     group_name = self.GROUPS['de'][0]
     data_dict = {
         'id': group_name,
         'name': munge_title_to_name(group_name),
         'title': group_name
     }
     try:
         group = get_action('group_show')(context, data_dict)
     except:
         group = get_action('group_create')(context, data_dict)
         log.info('created the group ' + group['id'])
     group_ids = []
     group_ids.append(group['id'])
     return group_ids

Exemple #40

0

Afficher le fichier

Fichier : snlharvester.py Projet : ogdch/ckanext-snl

 def _find_or_create_groups(self, context):
     group_name = self.GROUPS['de'][0]
     data_dict = {
         'id': group_name,
         'name': munge_title_to_name(group_name),
         'title': group_name
         }
     try:
         group = get_action('group_show')(context, data_dict)
     except:
         group = get_action('group_create')(context, data_dict)
         log.info('created the group ' + group['id'])
     group_ids = []
     group_ids.append(group['id'])
     return group_ids

Exemple #41

0

Afficher le fichier

Fichier : harvester.py Projet : LondonAppDev/ckanext-oaipmh-1

    def _find_or_create_groups(self, groups, context):
        log.debug("Group names: %s" % groups)
        group_ids = []
        for group_name in groups:
            data_dict = {"id": group_name, "name": munge_title_to_name(group_name), "title": group_name}
            try:
                group = get_action("group_show")(context, data_dict)
                log.info("found the group " + group["id"])
            except:
                group = get_action("group_create")(context, data_dict)
                log.info("created the group " + group["id"])
            group_ids.append(group["id"])

        log.debug("Group ids: %s" % group_ids)
        return group_ids

Exemple #42

0

Afficher le fichier

Fichier : base.py Projet : ngds/ckanext-harvest

    def _gen_new_name(self, title):
        '''
        Creates a URL friendly name from a title

        If the name already exists, it will add some random characters at the end
        '''

        name = munge_title_to_name(title).replace('_', '-')
        while '--' in name:
            name = name.replace('--', '-')
        pkg_obj = Session.query(Package).filter(Package.name == name).first()
        if pkg_obj:
            return name + str(uuid.uuid4())[:5]
        else:
            return name

Exemple #43

0

Afficher le fichier

    def make_package_name(self, title, exclude_existing_package, for_deletion):
        '''
        Creates a URL friendly name from a title

        If the name already exists, it will add some random characters at the end
        '''

        name = munge_title_to_name(title).replace('_', '-')
        if for_deletion: name = "deleted-" + name
        while '--' in name:
            name = name.replace('--', '-')
        name = name[0:90] # max length is 100
        pkg_obj = Session.query(Package).filter(Package.name == name).filter(Package.id != exclude_existing_package).first()
        if pkg_obj:
            return name + "-" + str(uuid.uuid4())[:5]
        else:
            return name

Exemple #44

0

Afficher le fichier

Fichier : gemini.py Projet : kmbn/ckanext-spatial

 def gen_new_name(self, title):
     name = munge_title_to_name(title).replace('_', '-')
     while '--' in name:
         name = name.replace('--', '-')
     like_q = u'%s%%' % name
     pkg_query = Session.query(Package).filter(
         Package.name.ilike(like_q)).limit(100)
     taken = [pkg.name for pkg in pkg_query]
     if name not in taken:
         return name
     else:
         counter = 1
         while counter < 101:
             if name + six.text_type(counter) not in taken:
                 return name + six.text_type(counter)
             counter = counter + 1
         return None

Exemple #45

0

Afficher le fichier

    def improve_pkg_dict(self, pkg_dict, params):
        if pkg_dict['name'] != '':
            pkg_dict['name'] = munge_name(pkg_dict['name']).replace('_', '-')
        else:
            pkg_dict['name'] = munge_title_to_name(pkg_dict['title'])
        if pkg_dict['url'] == '':
            pkg_dict.pop('url', None)

        # override the 'id' as this never matches the CKAN internal ID
        pkg_dict['id'] = pkg_dict['name']

        if params is not None and params.get(license, None) is not None:
            pkg_dict['license_id'] = params['license']
        else:
            pkg_dict['license_id'] = config.get('ckanext.ddi.default_license')

        return pkg_dict

Exemple #46

0

Afficher le fichier

Fichier : plugin.py Projet : berlinonline/ckanext-fisbroker

def generate_name(data_dict):
    '''Generate a unique name based on the package's title and FIS-Broker
       guid.'''

    iso_values = data_dict['iso_values']
    package_dict = data_dict['package_dict']

    name = munge_title_to_name(package_dict['title'])
    name = re.sub('-+', '-', name)
    # ensure we don't exceed the allowed name length of 100:
    # (100-len(guid_part)-1)
    name = name[:91].strip('-')

    guid = iso_values['guid']
    guid_part = guid.split('-')[0]
    name = "{0}-{1}".format(name, guid_part)
    return name

Exemple #47

0

Afficher le fichier

Fichier : ingesters.py Projet : PacificCommunity/ckanext-spc-pdh

    def process(self, record):
        record = record
        data_dict = {
            'id':
            record['ID'],
            'title':
            record['title'].strip('{}'),
            'name':
            munge_title_to_name(record['ID'] + record['title']),
            'notes':
            record['abstract'],
            'harvest_source':
            'MENDELEY',
            'creator':
            record['author'].replace(',', '').split(' and '),
            'tag_string':
            ','.join(munge_tag(tag) for tag in record['keywords'].split(',')),
            'owner_org':
            tk.config.get('ckanext.ingestor.config.mendeley_bib.owner_org',
                          'iaea'),
            'type':
            'publications'
        }
        identifiers = []
        if 'doi' in record:
            identifiers.append('doi:' + record['doi'])
        if 'isbn' in record:
            identifiers.append('isbn:' + record['isbn'])
        if 'pmid' in record:
            identifiers.append('pmid:' + record['pmid'])
        data_dict['identifier'] = identifiers

        if 'editor' in record:
            data_dict['contributor'] = [record['editor']]
        if 'publisher' in record:
            data_dict['publisher'] = [record['publisher']]
        if 'language' in record:
            data_dict['language'] = [record['language']]

        data_dict['source'] = record.get('url')
        user = tk.get_action('get_site_user')({'ignore_auth': True})
        existing = model.Package.get(data_dict['id'])
        action = tk.get_action(
            'package_update' if existing else 'package_create')
        action({'ignore_auth': True, 'user': user['name']}, data_dict)

Exemple #48

0

Afficher le fichier

Fichier : dkan.py Projet : PacificCommunity/ckanext-spc-pdh

    def get_harvested_package_dict(cls, harvest_object):
        package = CKANHarvester.get_harvested_package_dict(harvest_object)
        # change the DKAN-isms into CKAN-style
        try:
            if 'extras' not in package:
                package['extras'] = {}

            if 'name' not in package:
                package['name'] = munge.munge_title_to_name(package['title'])

            if 'description' in package:
                package['notes'] = package['description']

            for license in model.Package.get_license_register().values():
                if license.title == package['license_title']:
                    package['license_id'] = license.id
                    break
            else:
                package['license_id'] = 'notspecified'

            if 'resources' not in package:
                raise PackageDictError('Dataset has no resources')
            for resource in package['resources']:
                resource['description'] = resource['title']

                if 'revision_id' in resource:
                    del resource['revision_id']

                if 'format' not in resource:
                    resource['format'] = MIMETYPE_FORMATS.get(
                        resource.get('mimetype'), '')

            if 'private' in package:
                # DKAN appears to have datasets with private=True which are
                # still public: https://github.com/NuCivic/dkan/issues/950. If
                # they were really private then we'd not get be able to access
                # them, so assume they are not private.
                package['private'] = False

            return package
        except (Exception) as e:
            cls._save_object_error(
                'Unable to get convert DKAN to CKAN package: %s' % e,
                harvest_object)
            return None

Exemple #49

0

Afficher le fichier

Fichier : harvester.py Projet : UtrechtUniversity/ckanext-oaipmh

    def _find_or_create_entity(self, entityType, entityNames, context):
        log.debug(entityType + ' names: %s' % entityNames)
        entity_ids = []
        for entity_name in entityNames:
            data_dict = {
                'id': self._utf8_and_remove_diacritics(entity_name),
                'name': munge_title_to_name(entity_name),
                'title': entity_name
            }
            try:
                entity = get_action(entityType + '_show')(context, data_dict)
                log.info('found the ' + entityType + ' with id' + entity['id'])
            except Exception:
                entity = self._create_entity(entityType, data_dict, context)

            entity_ids.append(entity['id'])

            log.debug(entityType + ' ids: %s' % entity_ids)
        return entity_ids

Exemple #50

0

Afficher le fichier

def command(config_ini, nodepublisher_csv):
    config_ini_filepath = os.path.abspath(config_ini)
    load_config(config_ini_filepath)
    engine = engine_from_config(config,'sqlalchemy.')

    from ckan import model
    from ckan.lib.munge import munge_title_to_name

    logging.config.fileConfig(config_ini_filepath)
    log = logging.getLogger(os.path.basename(__file__))
    global global_log
    global_log = log

    model.init_model(engine)

    # Register a translator in this thread so that
    # the _() functions in logic layer can work
    from ckan.lib.cli import MockTranslator
    registry=Registry()
    registry.prepare()
    translator_obj=MockTranslator() 
    registry.register(translator, translator_obj) 

    model.repo.new_revision()

    log.info('Reading %s', nodepublisher_csv)
    with open(nodepublisher_csv, 'rU') as f:
        reader = csv.reader( f)
        for row in reader:
            nid, title = row
            publishers[ int(nid) ] = munge_title_to_name(title)
    # Mappings where we are getting rid of duplicate publishers
    publishers[16268] = publishers[11408] # UKSA -> ONS
    publishers[11606] = publishers[11408] # ONS
    publishers[20054] = publishers[16248] # Met Office
    publishers[33036] = publishers[15255] # Windsor & Maidenhead
    publishers[32619] = publishers[33245] # Monmouthshire
    publishers[12662] = publishers[11567] # NHS

    update_datasets()
    generate_harvest_publishers()

    log.info('Warnings: %r', warnings)

Exemple #51

0

Afficher le fichier

Fichier : harvester.py Projet : Dolpic/ckanext-oaipmh

    def _find_or_create_groups(self, groups, context):
        log.debug('Group names: %s' % groups)
        group_ids = []
        for group_name in groups:
            data_dict = {
                'id': group_name,
                'name': munge_title_to_name(group_name),
                'title': group_name
            }
            try:
                group = get_action('group_show')(context.copy(), data_dict)
                log.info('found the group ' + group['id'])
            except:
                group = get_action('group_create')(context.copy(), data_dict)
                log.info('created the group ' + group['id'])
            group_ids.append(group['id'])

        log.debug('Group ids: %s' % group_ids)
        return group_ids

Exemple #52

0

Afficher le fichier

Fichier : test_plugin.py Projet : eetumans/opendata

    def test_organization_import_update(self):
        """ Test updating organization import from file """
        organization_url = tools.get_organization_test_source()

        for extras in False, True:
            data = {'url': organization_url}
            if extras:
                data['public_organization'] = True
            result = organization_import.apply((simplejson.dumps(data), ))
            self.assert_true(result.successful())
            for title in u"Kainuun ty\u00f6- ja elinkeinotoimisto", u"Lapin ty\u00f6- ja elinkeinotoimisto",\
                         u"Suomen ymp\u00e4rist\u00f6keskus":
                organization = tests.call_action_api(
                    self.app,
                    'organization_show',
                    id=munge_title_to_name(title).lower())
                self.assert_equal(organization['title'], title)
                self.assert_true(
                    'public_adminstration_organization'
                    not in organization)  # We do not want this to be updated

Exemple #53

0

Afficher le fichier

    def validator(key, data, errors, context):
        if errors[key]:
            return

        value = data[key]
        if value is not missing:
            if value:
                return

        output = {}

        prefix = field['autogeneration_field']
        if not prefix:
            prefix = DEFAULT_TITLE_FIELD

        log.debug('[csc_multilanguage_url] Creating field using the field %s',
                  prefix)

        prefix = prefix + '-'

        extras = data.get(key[:-1] + ('__extras', ), {})

        locales = []

        autogeneration_locale = field['autogeneration_locale']
        if autogeneration_locale:
            locales.append(autogeneration_locale)
        locale_default = config.get('ckan.locale_default', 'es')
        if locale_default:
            locales.append(locale_default)

        for l in locales:
            title_lang = prefix + l
            if title_lang in extras and extras[title_lang]:
                dataset_title = extras[title_lang]
                data[key] = munge.munge_title_to_name(dataset_title)
                log.debug(
                    '[csc_multilanguage_url] Created name "%s" for package from language %s',
                    data[key], l)
                break
        return

Exemple #54

0

Afficher le fichier

Fichier : harvester_cmsdatanavigator.py Projet : usuariobkp/ckanext-datajsonar

    def set_dataset_info(self, package, dataset, dataset_defaults):
        extra(package, "Agency", "Department of Health & Human Services")
        package["author"] = "Centers for Medicare & Medicaid Services"
        extra(package, "author_id", "http://healthdata.gov/id/agency/cms")
        extra(package, "Bureau Code", "009:38")
        package["title"] = dataset["Name"].strip()
        package["notes"] = dataset.get("Description")

        package["url"] = dataset.get("Address")

        dataset_hd = dataset["HealthData"]
        extra(package, "Date Released",
              parsedate(dataset_hd.get("DateReleased")))
        extra(package, "Date Updated",
              parsedate(dataset_hd.get("DateUpdated")))
        extra(package, "Agency Program URL",
              dataset_hd.get("AgencyProgramURL"))
        extra(package, "Subject Area 1", "Medicare")
        extra(package, "Unit of Analysis", dataset_hd.get("UnitOfAnalysis"))
        extra(package, "Data Dictionary", dataset_hd.get("DataDictionaryURL"))
        extra(package, "Coverage Period", dataset_hd.get("Coverage Period"))
        extra(package, "Collection Frequency",
              dataset_hd.get("Collection Frequency"))
        extra(package, "Geographic Scope", dataset_hd.get("GeographicScope"))
        extra(
            package, "Contact Name",
            dataset_hd.get("GenericContactName", None)
            or dataset_hd.get("ContactName")
        )  # 'X or Y' syntax returns Y if X is either None or the empty string
        extra(
            package, "Contact Email",
            dataset_hd.get("GenericContactEmail", None)
            or dataset_hd.get("ContactEmail"))
        extra(package, "License Agreement",
              dataset_hd.get("DataLicenseAgreementURL"))

        from ckan.lib.munge import munge_title_to_name
        package["tags"] = [{
            "name": munge_title_to_name(t["Name"])
        } for t in dataset.get("Keywords", [])]

Exemple #55

0

Afficher le fichier

Fichier : test_plugin.py Projet : eetumans/opendata

 def test_organization_import(self):
     """ Test organization import """
     organization_url = tools.get_organization_test_source()
     data = simplejson.dumps({
         'url': organization_url,
         'public_organization': True
     })
     for _ in xrange(2):
         result = organization_import.apply((data, ))
         self.assert_true(result.successful())
         for title in u"Kainuun ty\u00f6- ja elinkeinotoimisto", u"Lapin ty\u00f6- ja elinkeinotoimisto",\
                      u"Suomen ymp\u00e4rist\u00f6keskus":
             organization = tests.call_action_api(
                 self.app,
                 'organization_show',
                 id=munge_title_to_name(title).lower())
             self.assert_equal(organization['title'], title)
             public_org = 'false'
             for extra in organization['extras']:
                 if extra['key'] == 'public_adminstration_organization':
                     public_org = 'true'
             self.assert_equal(public_org, 'true')

Exemple #56

0

Afficher le fichier

    def fetch_stage(self, harvest_object):
        '''
        The fetch stage will receive a HarvestObject object and will be
        responsible for:
            - getting the contents of the remote object (e.g. for a CSW server,
              perform a GetRecordById request).
            - saving the content in the provided HarvestObject.
            - creating and storing any suitable HarvestObjectErrors that may
              occur.
            - returning True if everything went as expected, False otherwise.

        :param harvest_object: HarvestObject object
        :returns: True if everything went right, False if errors were found
        '''
        logger.debug("in fetch stage: %s" % harvest_object.guid)
        try:
            self._set_config(harvest_object.job.source.config)
            content_dict = json.loads(harvest_object.content)
            content_dict['id'] = content_dict['identifier']

            content_dict['name'] = (
                munge_title_to_name(content_dict['title']) +
                content_dict['id'])[:99]
            try:
                content = json.dumps(content_dict)
            except Exception:
                logger.exception('Dumping the metadata failed!')
                self._save_object_error('Dumping the metadata failed!',
                                        harvest_object)
                return False

            harvest_object.content = content
            harvest_object.save()
        except Exception:
            logger.exception('Something went wrong!')
            self._save_object_error('Exception in fetch stage', harvest_object)
            return False

        return True

Exemple #57

0

Afficher le fichier

Fichier : kobo_dataset.py Projet : okfn/ckanext-unhcr

    def get_initial_package(self, user_obj):
        """ Get the initial package from the kobo asset.
            Require the user to get the token and to validate
            ownership on the asset.
            Return a pkg_dict or raises an error """

        kobo_api = self.get_kobo_api(user_obj)
        asset = kobo_api.get_asset(self.kobo_asset_id)

        pkg = {
            'title':
            asset['name'],
            'name':
            munge_title_to_name(asset['name']),
            'notes':
            self._build_asset_notes(asset),
            'original_id':
            asset['uid'],
            'extras': [
                {
                    'key': 'kobo_asset_id',
                    'value': self.kobo_asset_id
                },
                {
                    'key': 'kobo_owner',
                    'value': asset['owner__username']
                },
                {
                    'key': 'kobo_sector',
                    'value': asset['settings'].get('sector')
                },
                {
                    'key': 'kobo_country',
                    'value': asset['settings'].get('country')
                },
            ],
        }

        return pkg

Exemple #58

0

Afficher le fichier

Fichier : harvester_base.py Projet : govtmirror/ckanext-datajson

    def make_package_name(self, title, exclude_existing_package, for_deletion):
        '''
        Creates a URL friendly name from a title

        If the name already exists, it will add some random characters at the end
        '''

        name = munge_title_to_name(title).replace('_', '-')
        if for_deletion: name = "deleted-" + name
        while '--' in name:
            name = name.replace('--', '-')
        name = name[0:90]  # max length is 100

        # Is this slug already in use (and if we're updating a package, is it in
        # use by a different package?).
        pkg_obj = Session.query(Package).filter(Package.name == name).filter(
            Package.id != exclude_existing_package).first()
        if not pkg_obj:
            # The name is available, so use it. Note that if we're updating an
            # existing package we will be updating this package's URL, so incoming
            # links may break.
            return name

        if exclude_existing_package:
            # The name is not available, and we're updating a package. Chances
            # are the package's name already had some random string attached
            # to it last time. Prevent spurrious updates to the package's URL
            # (choosing new random text) by just reusing the existing package's
            # name.
            pkg_obj = Session.query(Package).filter(
                Package.id == exclude_existing_package).first()
            if pkg_obj:  # the package may not exist yet because we may be passed the desired package GUID before a new package is instantiated
                return pkg_obj.name

        # Append some random text to the URL. Hope that with five character
        # there will be no collsion.
        return name + "-" + str(uuid.uuid4())[:5]

Exemple #59

0

Afficher le fichier

    def gather_stage(self, harvest_job):
        log.debug('In ZhGisHarvester gather_stage')

        ids = []
        for dataset_id, dataset in self.DATASETS.iteritems():
            csw = ckan_csw.ZhGisCkanMetadata()
            metadata = csw.get_ckan_metadata_by_id(dataset_id).copy()
            log.debug(metadata)

            # Fix metadata information
            metadata['name'] = munge_title_to_name(metadata['name'])
            metadata['service_type'] = (metadata['service_type'].replace(
                'OGC:', ''))

            # Enrich metadata with hardcoded values
            metadata['url'] = dataset['geolion_url']
            metadata['tags'].extend(dataset['tags'])

            metadata['translations'] = self._generate_term_translations()
            log.debug("Translations: %s" % metadata['translations'])

            metadata['resources'] = (
                self._generate_resource_dict_array(metadata))
            log.debug(metadata['resources'])

            metadata['license_id'] = self.LICENSE['name']
            metadata['license_url'] = self.LICENSE['url']

            obj = HarvestObject(guid=metadata['id'],
                                job=harvest_job,
                                content=json.dumps(metadata))
            obj.save()
            log.debug('adding ' + metadata['name'] + ' to the queue')
            ids.append(obj.id)

        return ids

Exemple #60

0

Afficher le fichier

Fichier : excelimport.py Projet : mutantsan/ckanext-excelimport

    def run_create(self, context, data_dict, resources_sheet, archive):
        """Dataset creating proccess."""
        data_dict['name'] = munge_title_to_name(data_dict['title'])
        try:
            package_id_or_name_exists(data_dict['name'], context)
        except Invalid:
            pass
        else:
            counter = 0

            while True:
                name = '{0}-{1}'.format(data_dict['name'], counter)
                try:
                    package_id_or_name_exists(name, context)
                except Invalid:
                    data_dict['name'] = name
                    break
                counter += 1

        result = self.create_dataset(context, data_dict, resources_sheet,
                                     archive)

        if result:
            h.flash_success('Dataset was created!')