Beispiel #1
0
def update_package_schema():
    """
    Add our custom fields for validation from the form
    """
    schema = default_update_package_schema()
    _schema_update(schema, 'update')
    return schema
Beispiel #2
0
 def update_package_schema(self):
     schema = default_update_package_schema()
     schema.update({
         'vocab_tags': [ignore_missing,
                        convert_to_tags(TEST_VOCAB_NAME)],
     })
     return schema
Beispiel #3
0
    def form_to_db_schema_options(self, options={}):
        context = options.get('context', {})
        schema = context.get('schema', None)
        if schema:
            return schema

        elif options.get('api'):
            if options.get('type') == 'create':
                return default_schema.default_create_package_schema()
            else:
                return default_schema.default_update_package_schema()

        schema = self.form_to_db_schema()

        # Sysadmins can save UKLP datasets with looser validation
        # constraints.  This is because UKLP datasets are created using
        # a custom schema passed in from the harvester.  However, when it
        # comes to re-saving the dataset via the dataset form, there are
        # some validation requirements we need to drop.  That's what this
        # section of code does.
        pkg = context.get('package')
        user = context.get('user', '')
        if Authorizer().is_sysadmin(unicode(user)) and \
           pkg and pkg.extras.get('UKLP', 'False') == 'True':
            schema.update(self._uklp_sysadmin_schema_updates)

        return schema
Beispiel #4
0
def project_update_schema():
    schema = default_update_package_schema()
    schema.update(project_schema())
    schema.update({
        '__after': [update_cadasta_project],
    })
    return schema
Beispiel #5
0
    def form_to_db_schema_options(self, options={}):
        context = options.get('context', {})
        schema = context.get('schema',None)
        if schema:
            return schema

        elif options.get('api'):
            if options.get('type') == 'create':
                return default_schema.default_create_package_schema()
            else:
                return default_schema.default_update_package_schema()

        schema = self.form_to_db_schema()
        # Sysadmins can save UKLP datasets with looser validation
        # constraints.  This is because UKLP datasets are created using
        # a custom schema passed in from the harvester.  However, when it
        # comes to re-saving the dataset via the dataset form, there are
        # some validation requirements we need to drop.  That's what this
        # section of code does.
        pkg = context.get('package')
        user = context.get('user', '')
        if Authorizer().is_sysadmin(unicode(user)) and \
           pkg and pkg.extras.get('UKLP', 'False') == 'True':
            schema.update(self._uklp_sysadmin_schema_updates)
        if Authorizer().is_sysadmin(unicode(user)) and \
           pkg and pkg.extras.get('external_reference') == 'ONSHUB':
            self._ons_sysadmin_schema_updates(schema)
        return schema
    def setup(self):
        # Add sysadmin user
        harvest_user = model.User(name=u'harvest', password=u'test')
        model.add_user_to_role(harvest_user, model.Role.ADMIN, model.System())
        Session.add(harvest_user)
        Session.commit()

        package_schema = default_update_package_schema()
        self.context ={'model':model,
                       'session':Session,
                       'user':u'harvest',
                       'schema':package_schema,
                       'api_version': '2'}

        if config.get('ckan.harvest.auth.profile') == u'publisher':
            # Create a publisher user
            rev = model.repo.new_revision()
            self.publisher_user = model.User(name=u'test-publisher-user',password=u'test')
            self.publisher = model.Group(name=u'test-publisher',title=u'Test Publihser',type=u'publisher')
            Session.add(self.publisher_user)
            Session.add(self.publisher)

            Session.commit()

            member = model.Member(table_name = 'user',
                             table_id = self.publisher_user.id,
                             group=self.publisher,
                             capacity='admin')
            Session.add(member)

            Session.commit()
    def setup(self):

        # Add sysadmin user
        harvest_user = model.User(name=u"harvest", password=u"test")
        model.add_user_to_role(harvest_user, model.Role.ADMIN, model.System())
        Session.add(harvest_user)
        Session.commit()

        package_schema = default_update_package_schema()
        self.context = {
            "model": model,
            "session": Session,
            "user": u"harvest",
            "schema": package_schema,
            "api_version": "2",
        }

        if config.get("ckan.harvest.auth.profile") == u"publisher":
            # Create a publisher user
            rev = model.repo.new_revision()
            self.publisher_user = model.User(name=u"test-publisher-user", password=u"test")
            self.publisher = model.Group(name=u"test-publisher", title=u"Test Publihser", type=u"publisher")
            Session.add(self.publisher_user)
            Session.add(self.publisher)

            Session.commit()

            member = model.Member(
                table_name="user", table_id=self.publisher_user.id, group=self.publisher, capacity="admin"
            )
            Session.add(member)

            Session.commit()
Beispiel #8
0
def package_update_validate(context, data_dict):
    model = context['model']
    user = context['user']
    
    id = data_dict["id"]
    schema = context.get('schema') or default_update_package_schema()
    model.Session.remove()
    model.Session()._context = context

    pkg = model.Package.get(id)
    context["package"] = pkg

    if pkg is None:
        raise NotFound(_('Package was not found.'))
    data_dict["id"] = pkg.id

    check_access('package_update', context, data_dict)

    data, errors = validate(data_dict, schema, context)


    if errors:
        model.Session.rollback()
        raise ValidationError(errors, package_error_summary(errors))
    return data
Beispiel #9
0
def update_package_schema():
    """
    Add our custom fields for validation from the form
    """
    schema = default_update_package_schema()
    _schema_update(schema, 'update')
    return schema
def update_package_schema():
    schema = default_update_package_schema()

    _modify_schema(schema)
    name_validator = get_validator('name_validator')
    schema['name'] = [not_empty, unicode, trim_string(100), name_validator,
                      no_pending_dataset_with_same_name]

    return schema
    def setup(self):
        print ("")
        print ("TestUM:setup() before each test method")

        # Add sysadmin user
        self.harvestUser = model.User(name=u'harvest', password=u'test', sysadmin=True)
        model.Session.add(self.harvestUser)
        model.Session.commit()

        source_fixture = {
            'title': 'Test Source',
            'name': 'test-source',
            'url': u'xml/sample.xml',
            'source_type': u'ngds'
        }

        context = {
            'model': model,
            'session': model.Session,
            'user': u'harvest'
        }

        if config.get('ckan.harvest.auth.profile') == u'publisher' \
           and not 'publisher_id' in source_fixture:
           source_fixture['publisher_id'] = self.publisher.id

        source_dict=get_action('harvest_source_create')(context, source_fixture)
        self.oHarvestSource = HarvestSource.get(source_dict['id'])

        job_dict=get_action('harvest_job_create')(context,{'source_id': self.oHarvestSource.id})
        self.oHarvestJob = HarvestJob.get(job_dict['id'])

        context = {
            'model' : model,
            'session': model.Session,
            'ignore_auth': True,
        }

        data_dict = {
            'guid' : 'guid',
            'content' : self.contentDataset,
            'job_id' : self.oHarvestJob.id,
            'extras' : { 'a key' : 'a value' },
        }

        oHarvestObject = toolkit.get_action('harvest_object_create')(context, data_dict)
        self.oHarvestObject = HarvestObject.get(oHarvestObject['id'])

        package_schema = default_update_package_schema()
        self.context = {
            'model':model,
            'session': model.Session,
            'user':u'harvest',
            'schema':package_schema,
            'api_version': '2'
        }
Beispiel #12
0
    def setup(self):
        # Add sysadmin user
        harvest_user = model.User(name=u'harvest', password=u'test', sysadmin=True)
        Session.add(harvest_user)
        Session.commit()

        package_schema = default_update_package_schema()
        self.context ={'model':model,
                       'session':Session,
                       'user':u'harvest',
                       'schema':package_schema,
                       'api_version': '2'}
Beispiel #13
0
    def setup(self):
        # Add sysadmin user
        harvest_user = model.User(name=u'harvest', password=u'test', sysadmin=True)
        Session.add(harvest_user)
        Session.commit()

        package_schema = default_update_package_schema()
        self.context ={'model':model,
                       'session':Session,
                       'user':u'harvest',
                       'schema':package_schema,
                       'api_version': '2'}
Beispiel #14
0
 def setup(self):
     super(FisbrokerTestBase, self).setup()
     reset_mock_server()
     # Add sysadmin user
     user_name = u'harvest'
     harvest_user = model.User(name=user_name, password=u'test', sysadmin=True)
     Session.add(harvest_user)
     Session.commit()
     package_schema = default_update_package_schema()
     self.context = {
         'model': model,
         'session': Session,
         'user': user_name,
         'schema': package_schema,
         'api_version': '2'
     }
Beispiel #15
0
    def package_update(self, context, data_dict):
        preview = context.get('preview', False)
        schema = context.get('schema') or default_update_package_schema()
        if preview:
            return
        if 'id' not in data_dict:
            raise NotFound
        result = self.engine.execute(
                select(
                    [self.package_table.c.nid],
                    or_(self.package_table.c.id == data_dict['id'],
                        self.package_table.c.name == data_dict['id'])
                )
        ).fetchone()
        if not result:
            raise NotFound
        nid = result['nid']
        data_dict['body'] = data_dict['notes']
        ## run through validate to make sure tags are in correct place
        data, errors = validate(data_dict, schema, context)
        terms = {}
        for num, tag in enumerate(data.get('tags', [])):
            terms[str(num)] = tag['name']
        data_dict['terms'] = terms

        if data_dict.get('state', 'active') == 'active':
            data_dict['status'] = '1'
        else:
            data_dict['status'] = '0'

        url = urlparse.urljoin(self.base_url, 'services/package/%s.json' % (nid))
        data = json.dumps({'data': data_dict})
        req = urllib2.Request(url, data, {'Content-type': 'application/json'})
        req.get_method = lambda: 'PUT'
        ##XXX think about error conditions a bit more
        f = urllib2.urlopen(req, None, 3)
        try:
            drupal_info = json.loads(f.read())
        finally:
            f.close()

        session = context['model'].Session
        context['nid'] = result['nid']
        package_update = update.package_update(context, data_dict)
        package_update['nid'] = result['nid']
        package_update['revision_message'] = '%s-%s'%(session.revision.id,session.revision.message)
        return package_update
Beispiel #16
0
    def package_update(self, context, data_dict):
        preview = context.get('preview', False)
        schema = context.get('schema') or default_update_package_schema()
        if preview:
            return
        if 'id' not in data_dict:
            raise NotFound
        result = self.engine.execute(
            select(
                [self.package_table.c.nid],
                or_(self.package_table.c.id == data_dict['id'],
                    self.package_table.c.name == data_dict['id']))).fetchone()
        if not result:
            raise NotFound
        nid = result['nid']
        data_dict['body'] = data_dict['notes']
        ## run through validate to make sure tags are in correct place
        data, errors = validate(data_dict, schema, context)
        terms = {}
        for num, tag in enumerate(data.get('tags', [])):
            terms[str(num)] = tag['name']
        data_dict['terms'] = terms

        if data_dict.get('state', 'active') == 'active':
            data_dict['status'] = '1'
        else:
            data_dict['status'] = '0'

        url = urlparse.urljoin(self.base_url,
                               'services/package/%s.json' % (nid))
        data = json.dumps({'data': data_dict})
        req = urllib2.Request(url, data, {'Content-type': 'application/json'})
        req.get_method = lambda: 'PUT'
        ##XXX think about error conditions a bit more
        f = urllib2.urlopen(req, None, 3)
        try:
            drupal_info = json.loads(f.read())
        finally:
            f.close()

        session = context['model'].Session
        context['nid'] = result['nid']
        package_update = update.package_update(context, data_dict)
        package_update['nid'] = result['nid']
        package_update['revision_message'] = '%s-%s' % (
            session.revision.id, session.revision.message)
        return package_update
Beispiel #17
0
 def validate(self, context, data_dict, schema, action):
     if action in ('package_update', 'package_create'):
         # If the caller to package_update specified a schema (e.g.
         # harvesters specify the default schema) then we don't want to
         # override that.
         if not context.get('schema'):
             if 'api_version' in context:
                 # When accessed by the API, just use the default schemas.
                 # It's only the forms that are customized to make it easier
                 # for humans.
                 if action == 'package_create':
                     schema = default_schema.default_create_package_schema()
                 else:
                     schema = default_schema.default_update_package_schema()
             else:
                 # Customized schema for DGU form
                 schema = self.form_to_db_schema_options(context)
     return toolkit.navl_validate(data_dict, schema, context)
Beispiel #18
0
def package_update(context, data_dict):
    model = context['model']
    user = context['user']
    
    id = data_dict["id"]
    preview = context.get('preview', False)
    schema = context.get('schema') or default_update_package_schema()
    model.Session.remove()
    model.Session()._context = context

    pkg = model.Package.get(id)
    context["package"] = pkg

    if pkg is None:
        raise NotFound(_('Package was not found.'))
    data_dict["id"] = pkg.id

    check_access(pkg, model.Action.EDIT, context)

    data, errors = validate(data_dict, schema, context)

    check_group_auth(context, data)

    if errors:
        model.Session.rollback()
        raise ValidationError(errors, package_error_summary(errors))

    if not preview:
        rev = model.repo.new_revision()
        rev.author = user
        if 'message' in context:
            rev.message = context['message']
        else:
            rev.message = _(u'REST API: Update object %s') % data.get("name")

    pkg = package_dict_save(data, context)

    if not preview:
        for item in PluginImplementations(IPackageController):
            item.edit(pkg)
        model.repo.commit()        
        return package_dictize(pkg, context)
    return data
Beispiel #19
0
def package_update(context, data_dict):
    model = context['model']
    user = context['user']

    id = data_dict["id"]
    preview = context.get('preview', False)
    schema = context.get('schema') or default_update_package_schema()
    model.Session.remove()
    model.Session()._context = context

    pkg = model.Package.get(id)
    context["package"] = pkg

    if pkg is None:
        raise NotFound(_('Package was not found.'))
    data_dict["id"] = pkg.id

    check_access(pkg, model.Action.EDIT, context)

    data, errors = validate(data_dict, schema, context)

    check_group_auth(context, data)

    if errors:
        model.Session.rollback()
        raise ValidationError(errors, package_error_summary(errors))

    if not preview:
        rev = model.repo.new_revision()
        rev.author = user
        if 'message' in context:
            rev.message = context['message']
        else:
            rev.message = _(u'REST API: Update object %s') % data.get("name")

    pkg = package_dict_save(data, context)

    if not preview:
        for item in PluginImplementations(IPackageController):
            item.edit(pkg)
        model.repo.commit()
        return package_dictize(pkg, context)
    return data
Beispiel #20
0
def package_update_schema():
    schema = default_update_package_schema()
    schema.update({
        'frequency_time_modifier':
        [ignore_missing, unicode, convert_to_extras],
        'frequency_count': [ignore_missing, convert_to_extras],
        'frequency_update_period':
        [ignore_missing, unicode, convert_to_extras],
        'frequency_period': [ignore_missing, unicode, convert_to_extras],
        # frequency is constructed from the other frequency_ fields
        'frequency': [ignore_missing],
        'retention_count':
        [ignore_missing, is_positive_integer, convert_to_extras],
        'retention_period': [ignore_missing, unicode, convert_to_extras],
        'delivery_unit': [ignore_missing, unicode, convert_to_extras],
        'service': [ignore_missing, unicode, convert_to_extras],
        'next_update': [ignore_missing, unicode, convert_to_extras],
        'review_date': [ignore_missing, unicode, convert_to_extras],
        'coverage_start_date': [ignore_missing, unicode, convert_to_extras],
        'coverage_end_date': [ignore_missing, unicode, convert_to_extras],
    })
    return schema
Beispiel #21
0
def package_update(context, data_dict):
    model = context["model"]
    user = context["user"]

    id = data_dict["id"]
    schema = context.get("schema") or default_update_package_schema()
    model.Session.remove()
    model.Session()._context = context

    pkg = model.Package.get(id)
    context["package"] = pkg

    if pkg is None:
        raise NotFound(_("Package was not found."))
    data_dict["id"] = pkg.id

    check_access("package_update", context, data_dict)

    data, errors = validate(data_dict, schema, context)

    if errors:
        model.Session.rollback()
        raise ValidationError(errors, package_error_summary(errors))

    rev = model.repo.new_revision()
    rev.author = user
    if "message" in context:
        rev.message = context["message"]
    else:
        rev.message = _(u"REST API: Update object %s") % data.get("name")

    pkg = package_dict_save(data, context)

    for item in PluginImplementations(IPackageController):
        item.edit(pkg)
    if not context.get("defer_commit"):
        model.repo.commit()
    return package_dictize(pkg, context)
Beispiel #22
0
def package_update_schema():
    schema = default_update_package_schema()
    schema.update({
        'frequency_time_modifier': [ignore_missing, unicode,
                                    convert_to_extras],
        'frequency_count': [ignore_missing, convert_to_extras],
        'frequency_update_period': [ignore_missing, unicode,
                                    convert_to_extras],
        'frequency_period': [ignore_missing, unicode, convert_to_extras],
        # frequency is constructed from the other frequency_ fields
        'frequency': [ignore_missing],

        'retention_count': [ignore_missing, is_positive_integer,
                            convert_to_extras],
        'retention_period': [ignore_missing, unicode, convert_to_extras],
        'delivery_unit': [ignore_missing, unicode, convert_to_extras],
        'service': [ignore_missing, unicode, convert_to_extras],
        'next_update': [ignore_missing, unicode, convert_to_extras],
        'review_date': [ignore_missing, unicode, convert_to_extras],
        'coverage_start_date': [ignore_missing, unicode, convert_to_extras],
        'coverage_end_date': [ignore_missing, unicode, convert_to_extras],
    })
    return schema
    def setup(self):

        # Add sysadmin user
        harvest_user = model.User(name=u'harvest', password=u'test')
        model.add_user_to_role(harvest_user, model.Role.ADMIN, model.System())
        Session.add(harvest_user)
        Session.commit()

        package_schema = default_update_package_schema()
        self.context = {
            'model': model,
            'session': Session,
            'user': u'harvest',
            'schema': package_schema,
            'api_version': '2'
        }

        if config.get('ckan.harvest.auth.profile') == u'publisher':
            # Create a publisher user
            rev = model.repo.new_revision()
            self.publisher_user = model.User(name=u'test-publisher-user',
                                             password=u'test')
            self.publisher = model.Group(name=u'test-publisher',
                                         title=u'Test Publihser',
                                         type=u'publisher')
            Session.add(self.publisher_user)
            Session.add(self.publisher)

            Session.commit()

            member = model.Member(table_name='user',
                                  table_id=self.publisher_user.id,
                                  group=self.publisher,
                                  capacity='admin')
            Session.add(member)

            Session.commit()
Beispiel #24
0
    def test_clean_tags(self):
        
        # Create source
        source_fixture = {
            'title': 'Test Source',
            'name': 'test-source',
            'url': u'http://127.0.0.1:8999/gemini2.1/dataset1.xml',
            'source_type': u'gemini-single',
            'owner_org': 'test-org',
            'metadata_created': datetime.now().strftime('%YYYY-%MM-%DD %HH:%MM:%s'),
            'metadata_modified': datetime.now().strftime('%YYYY-%MM-%DD %HH:%MM:%s'),

        }

        user = User.get('dummy')
        if not user:
            user = call_action('user_create',
                               name='dummy',
                               password='******',
                               email='*****@*****.**')
            user_name = user['name']
        else:
            user_name = user.name
        org = Group.by_name('test-org')
        if org is None:
            org  = call_action('organization_create',
                                context={'user': user_name},
                                name='test-org')
        existing_g = Group.by_name('existing-group')
        if existing_g is None:
            existing_g  = call_action('group_create',
                                      context={'user': user_name},
                                      name='existing-group')

        context = {'user': '******'} 
        package_schema = default_update_package_schema()
        context['schema'] = package_schema
        package_dict = {'frequency': 'manual',
              'publisher_name': 'dummy',
              'extras': [{'key':'theme', 'value':['non-mappable', 'thememap1']}],
              'groups': [],
              'title': 'fakename',
              'holder_name': 'dummy',
              'holder_identifier': 'dummy',
              'name': 'fakename',
              'notes': 'dummy',
              'owner_org': 'test-org',
              'modified': datetime.now(),
              'publisher_identifier': 'dummy',
              'metadata_created' : datetime.now(),
              'metadata_modified' : datetime.now(),
              'guid': unicode(uuid4()),
              'identifier': 'dummy'}
        
        package_data = call_action('package_create', context=context, **package_dict)

        package = Package.get('fakename')
        source, job = self._create_source_and_job(source_fixture)
        job.package = package
        job.guid = uuid4()
        harvester = SpatialHarvester()
        with open(os.path.join('..', 'data', 'dataset.json')) as f:
            dataset = json.load(f)

        # long tags are invalid in all cases
        TAG_LONG_INVALID = 'abcdefghij' * 20
        # if clean_tags is not set to true, tags will be truncated to 50 chars
        TAG_LONG_VALID = TAG_LONG_INVALID[:50]
        # default truncate to 100
        TAG_LONG_VALID_LONG = TAG_LONG_INVALID[:100]

        assert len(TAG_LONG_VALID) == 50
        assert TAG_LONG_VALID[-1] == 'j'
        TAG_CHARS_INVALID = '[email protected]!'
        TAG_CHARS_VALID = 'pretty-invlidtag'

        dataset['tags'].append(TAG_LONG_INVALID)
        dataset['tags'].append(TAG_CHARS_INVALID)

        harvester.source_config = {'clean_tags': False}
        out = harvester.get_package_dict(dataset, job)
        tags = out['tags']

        # no clean tags, so invalid chars are in
        # but tags are truncated to 50 chars
        assert {'name': TAG_CHARS_VALID} not in tags
        assert {'name': TAG_CHARS_INVALID} in tags
        assert {'name': TAG_LONG_VALID_LONG} in tags
        assert {'name': TAG_LONG_INVALID} not in tags

        harvester.source_config = {'clean_tags': True}

        out = harvester.get_package_dict(dataset, job)
        tags = out['tags']
        assert {'name': TAG_CHARS_VALID} in tags
        assert {'name': TAG_LONG_VALID_LONG} in tags
Beispiel #25
0
    def test_clean_tags(self):
        
        # Create source
        source_fixture = {
            'title': 'Test Source',
            'name': 'test-source',
            'url': u'http://127.0.0.1:8999/gemini2.1/dataset1.xml',
            'source_type': u'gemini-single',
            'owner_org': 'test-org',
            'metadata_created': datetime.now().strftime('%YYYY-%MM-%DD %HH:%MM:%s'),
            'metadata_modified': datetime.now().strftime('%YYYY-%MM-%DD %HH:%MM:%s'),

        }

        user = User.get('dummy')
        if not user:
            user = call_action('user_create',
                               name='dummy',
                               password='******',
                               email='*****@*****.**')
            user_name = user['name']
        else:
            user_name = user.name
        org = Group.by_name('test-org')
        if org is None:
            org  = call_action('organization_create',
                                context={'user': user_name},
                                name='test-org')
        existing_g = Group.by_name('existing-group')
        if existing_g is None:
            existing_g  = call_action('group_create',
                                      context={'user': user_name},
                                      name='existing-group')

        context = {'user': '******'} 
        package_schema = default_update_package_schema()
        context['schema'] = package_schema
        package_dict = {'frequency': 'manual',
              'publisher_name': 'dummy',
              'extras': [{'key':'theme', 'value':['non-mappable', 'thememap1']}],
              'groups': [],
              'title': 'fakename',
              'holder_name': 'dummy',
              'holder_identifier': 'dummy',
              'name': 'fakename',
              'notes': 'dummy',
              'owner_org': 'test-org',
              'modified': datetime.now(),
              'publisher_identifier': 'dummy',
              'metadata_created' : datetime.now(),
              'metadata_modified' : datetime.now(),
              'guid': unicode(uuid4()),
              'identifier': 'dummy'}
        
        package_data = call_action('package_create', context=context, **package_dict)

        package = Package.get('fakename')
        source, job = self._create_source_and_job(source_fixture)
        job.package = package
        job.guid = uuid4()
        harvester = SpatialHarvester()
        with open(os.path.join('..', 'data', 'dataset.json')) as f:
            dataset = json.load(f)

        # long tags are invalid in all cases
        TAG_LONG_INVALID = 'abcdefghij' * 20
        # if clean_tags is not set to true, tags will be truncated to 50 chars
        TAG_LONG_VALID = TAG_LONG_INVALID[:50]
        # default truncate to 100
        TAG_LONG_VALID_LONG = TAG_LONG_INVALID[:100]

        assert len(TAG_LONG_VALID) == 50
        assert TAG_LONG_VALID[-1] == 'j'
        TAG_CHARS_INVALID = '[email protected]!'
        TAG_CHARS_VALID = 'pretty-invlidtag'

        dataset['tags'].append(TAG_LONG_INVALID)
        dataset['tags'].append(TAG_CHARS_INVALID)

        harvester.source_config = {'clean_tags': False}
        out = harvester.get_package_dict(dataset, job)
        tags = out['tags']

        # no clean tags, so invalid chars are in
        # but tags are truncated to 50 chars
        assert {'name': TAG_CHARS_VALID} not in tags
        assert {'name': TAG_CHARS_INVALID} in tags
        assert {'name': TAG_LONG_VALID_LONG} in tags
        assert {'name': TAG_LONG_INVALID} not in tags

        harvester.source_config = {'clean_tags': True}

        out = harvester.get_package_dict(dataset, job)
        tags = out['tags']
        assert {'name': TAG_CHARS_VALID} in tags
        assert {'name': TAG_LONG_VALID_LONG} in tags
Beispiel #26
0
class DCATdeMigrateCommand(tk.CkanCommand):
    '''
    Migrates CKAN datasets from OGD to DCAT-AP.de.

    Usage: dcatde_migrate [dry-run] [adms-id-migrate]
    Params:
        dry-run             If given, perform all migration tasks without saving. A full
                            log file is written.

        adms-id-migrate     If given, only migrate adms:identifier to dct:identifier for all affected
                            datasets.

        contributor-id-migrate If given, set a contributor-ID for all datasets without an ID.

    Connect with "nc -ul 5005" on the same machine to receive status updates.
    '''

    summary = __doc__.split('\n')[0]
    usage = __doc__

    UDP_IP = "127.0.0.1"
    UDP_PORT = 5005

    # constants for different migration modes
    MODE_OGD = 0
    MODE_ADMS_ID = 1
    MODE_CONTRIBUTOR_ID = 2

    dry_run = False
    migration_mode = MODE_OGD

    PACKAGE_UPDATE_SCHEMA = schema_.default_update_package_schema()

    def __init__(self, name):
        super(DCATdeMigrateCommand, self).__init__(name)
        self.executor = None  # initialized after config load
        try:
            email_validator = tk.get_validator('email_validator')
            self.PACKAGE_UPDATE_SCHEMA['maintainer_email'].remove(
                email_validator)
            self.PACKAGE_UPDATE_SCHEMA['author_email'].remove(email_validator)
        except (ValueError, UnknownValidator):
            pass

    def create_context(self):
        '''
        Creates new context.
        '''
        return {'model': model, 'ignore_auth': True}

    def command(self):
        '''
        Executes command.
        '''
        for cmd in self.args:
            if cmd == 'dry-run':
                self.dry_run = True
            elif cmd == 'adms-id-migrate':
                self.migration_mode = self.MODE_ADMS_ID
            elif cmd == 'contributor-id-migrate':
                self.migration_mode = self.MODE_CONTRIBUTOR_ID
            else:
                print 'Command %s not recognized' % cmd
                self.parser.print_usage()
                sys.exit(1)

        self._load_config()
        if self.migration_mode == self.MODE_ADMS_ID:
            self.migrate_adms_identifier()
        elif self.migration_mode == self.MODE_CONTRIBUTOR_ID:
            self.migrate_contributor_identifier()
        else:
            self.executor = migration_functions.MigrationFunctionExecutor(
                pylons.config.get('ckanext.dcatde.urls.license_mapping'),
                pylons.config.get('ckanext.dcatde.urls.category_mapping'))
            self.migrate_datasets()

    def migrate_datasets(self):
        '''
        Iterates over all datasets and migrates fields with 'migration_functions'
        '''
        # Check if all needed groups are present
        group_list = tk.get_action('group_list')
        if not self.executor.check_group_presence(
                group_list(self.create_context(), {})):
            return

        util.get_migrator_log().info('Starting dataset migration' + (
            ' [dry run without saving]' if self.dry_run else ''))

        # Change the type of all datasets to 'dataset' via DB query, as package_update() doesn't
        # allow to set the type
        if not self.dry_run:
            model.Session.query(model.Package)\
               .filter(or_((model.Package.type == "datensatz"),
                           (model.Package.type == "app"),
                           (model.Package.type == "dokument")))\
               .update({"type": u'dataset'})
            model.repo.commit()

        for dataset in self.iterate_local_datasets():
            self.executor.apply_to(dataset)

            self.update_dataset(dataset)

        util.get_migrator_log().info('Dataset migration finished' + (
            ' [dry run, did not save]' if self.dry_run else ''))

    def migrate_adms_identifier(self):
        util.get_migrator_log().info(
            'Migrating adms:identifier to dct:identifier' +
            (' [dry run without saving]' if self.dry_run else ''))

        for dataset in self.iterate_adms_id_datasets():
            # only migrate if dct:identifier is not already present
            if not dataset_utils.get_extras_field(dataset,
                                                  EXTRA_KEY_DCT_IDENTIFIER):
                util.rename_extras_field_migration(dataset,
                                                   EXTRA_KEY_ADMS_IDENTIFIER,
                                                   EXTRA_KEY_DCT_IDENTIFIER,
                                                   False)
                self.update_dataset(dataset)
            else:
                util.get_migrator_log().info(
                    '%sSkipping package as it already has a dct:identifier',
                    util.log_dataset_prefix(dataset))

        util.get_migrator_log().info(
            'Finished migration of adms:identifier to dct:identifier' +
            (' [dry run without saving]' if self.dry_run else ''))

    def migrate_contributor_identifier(self):
        ''' Add govdata-contributor-IDs to datasets that are missing one '''
        util.get_migrator_log().info('Migrating dcatde:contributorID' + (
            ' [dry run without saving]' if self.dry_run else ''))

        starttime = time.time()
        package_obj_to_update = gather_dataset_ids()
        endtime = time.time()
        print "INFO: %s datasets found to check for contributor-ID. Total time: %s." % \
              (len(package_obj_to_update), str(endtime - starttime))

        organization_list = tk.get_action('organization_list')(
            self.create_context(), {
                'all_fields': True,
                'include_extras': True
            })
        updated_count = created_count = 0
        starttime = time.time()

        for dataset in self.iterate_datasets(package_obj_to_update.keys()):
            print u'Updating dataset: {}'.format(dataset['title'])

            dataset_org_id = dataset['organization']['id']
            dataset_org = next((item for item in organization_list
                                if item['id'] == dataset_org_id), None)
            if not dataset_org:
                print u'Did not find a Organization for ID: ' + dataset_org_id
                continue

            org_contributor_field = get_extras_field(dataset_org,
                                                     EXTRA_KEY_CONTRIBUTOR_ID)
            if not org_contributor_field:
                print u'Did not find a contributor ID for Organization: ' + dataset_org_id
                continue

            try:
                org_contributor_id_list = json.loads(
                    org_contributor_field['value'])
            except ValueError:
                # json.loads failed -> value is not an array but a single string
                org_contributor_id_list = [org_contributor_field['value']]

            dataset_contributor_field = get_extras_field(
                dataset, EXTRA_KEY_CONTRIBUTOR_ID)
            requires_update = False
            if not dataset_contributor_field:
                # Contributor-id field does not exist yet
                set_extras_field(dataset, EXTRA_KEY_CONTRIBUTOR_ID,
                                 json.dumps(org_contributor_id_list))
                created_count = created_count + 1
                requires_update = True
            else:
                try:
                    current_ids_list = json.loads(
                        dataset_contributor_field['value'])
                except ValueError:
                    # json.loads failed -> value is not an array but a single string
                    current_ids_list = [dataset_contributor_field['value']]

                for contributor_id in org_contributor_id_list:
                    if contributor_id not in current_ids_list:
                        current_ids_list.append(contributor_id)
                        requires_update = True
                if requires_update:
                    updated_count = updated_count + 1
                    set_extras_field(dataset, EXTRA_KEY_CONTRIBUTOR_ID,
                                     json.dumps(current_ids_list))

            if requires_update:
                self.update_dataset(dataset)

        endtime = time.time()
        print "INFO: A Contributor-ID was created for %s datasets that did not have one before." % \
              created_count
        print "INFO: %s datasets were updated. Total time: %s." % (
            updated_count, str(endtime - starttime))

        util.get_migrator_log().info(
            'Finished migration of dcatde:contributorID' +
            (' [dry run without saving]' if self.dry_run else ''))

    def iterate_datasets(self, package_ids):
        '''
        Helper which iterates over all datasets in package_ids, i.e. fetches the package
        for all IDs
        '''
        package_show = tk.get_action('package_show')

        package_ids_unique = set(package_ids)
        progress_total = len(package_ids_unique)
        util.get_migrator_log().info('INFO migrating ' + str(progress_total) +
                                     ' datasets in total')
        progress_current = 0
        sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)

        for dataset_id in package_ids_unique:
            try:
                # write out status via UDP (see class doc for netcat cmd)
                progress_current += 1
                sock.sendto(
                    str(progress_current) + " / " + str(progress_total) + "\n",
                    (self.UDP_IP, self.UDP_PORT))

                dataset = package_show(self.create_context(),
                                       {'id': dataset_id.strip()})

                # ignore harvesters, which are in the list as well
                if dataset['type'] == 'harvest':
                    continue

                yield dataset

            except Exception:
                util.get_migrator_log().exception("Package '%s' was not found",
                                                  dataset_id)

    def iterate_local_datasets(self):
        '''
        Iterates over all local datasets
        '''
        package_list = tk.get_action('package_list')

        # returns only active datasets (missing datasets with status "private" and "draft")
        package_ids = package_list(self.create_context(), {})
        # Query all private and draft packages except harvest packages
        query = model.Session.query(model.Package)\
            .filter(or_(model.Package.private == True, model.Package.state == 'draft'))\
            .filter(model.Package.type != 'harvest')
        for package_object in query:
            package_ids.append(package_object.id)

        return self.iterate_datasets(package_ids)

    def iterate_adms_id_datasets(self):
        '''
        Iterates over all datasets having an adms:identifier (extras.alternate_identifier) field
        '''
        query = model.Session.query(model.PackageExtra.package_id) \
            .filter(model.PackageExtra.key == EXTRA_KEY_ADMS_IDENTIFIER) \
            .filter(model.PackageExtra.state != 'deleted')
        package_ids = []
        for package_object in query:
            package_ids.append(package_object.package_id)

        return self.iterate_datasets(package_ids)

    def update_dataset(self, dataset):
        '''
        Updates dataset in CKAN.
        '''
        if not self.dry_run:
            try:
                package_update = tk.get_action('package_update')
                ctx = self.create_context()
                ctx['schema'] = self.PACKAGE_UPDATE_SCHEMA
                ctx['return_id_only'] = True
                package_update(ctx, dataset)
            except Exception:
                util.get_migrator_log().exception(
                    util.log_dataset_prefix(dataset) + 'could not update')
def update_package_schema():
    schema = default_update_package_schema()
    _modify_schema(schema)
    return schema
    def test_1_package_schema(self):
        pkg = model.Session.query(model.Package)\
            .filter_by(name='annakarenina')\
            .first()

        package_id = pkg.id
        result = package_dictize(pkg, self.context)
        self.remove_changable_columns(result)

        result['name'] = 'anna2'
        # we need to remove these as they have been added
        del result['relationships_as_object']
        del result['relationships_as_subject']

        converted_data, errors = validate(result,
                                          default_create_package_schema(),
                                          self.context)

        expected_data = {
            'extras': [{'key': u'genre', 'value': u'romantic novel'},
                       {'key': u'original media', 'value': u'book'}],
            'groups': [{u'name': u'david',
                        u'title': u"Dave's books"},
                       {u'name': u'roger',
                        u'title': u"Roger's books"}],
            'license_id': u'other-open',
            'name': u'anna2',
            'type': u'dataset',
            'notes': u'Some test notes\n\n### A 3rd level heading\n\n**Some bolded text.**\n\n*Some italicized text.*\n\nForeign characters:\nu with umlaut \xfc\n66-style quote \u201c\nforeign word: th\xfcmb\n\nNeeds escaping:\nleft arrow <\n\n<http://ckan.net/>\n\n',
            'private': False,
            'resources': [{'alt_url': u'alt123',
                           'description': u'Full text. Needs escaping: " Umlaut: \xfc',
                           'format': u'plain text',
                           'hash': u'abc123',
                           'size_extra': u'123',
                           'url': u'http://datahub.io/download/x=1&y=2'},
                          {'alt_url': u'alt345',
                           'description': u'Index of the novel',
                           'format': u'JSON',
                           'hash': u'def456',
                           'size_extra': u'345',
                           'url': u'http://datahub.io/index.json'}],
            'tags': [{'name': u'Flexible \u30a1'},
                     {'name': u'russian'},
                     {'name': u'tolstoy'}],
            'title': u'A Novel By Tolstoy',
            'url': u'http://datahub.io',
            'version': u'0.7a'
        }

        assert converted_data == expected_data, pformat(converted_data)
        assert not errors, errors

        data = converted_data
        data['name'] = u'annakarenina'
        data.pop("title")
        data["resources"][0]["url"] = 'fsdfafasfsaf'
        data["resources"][1].pop("url")

        converted_data, errors = validate(data,
                                          default_create_package_schema(),
                                          self.context)

        assert errors == {
            'name': [u'That URL is already in use.'],
            'resources': [{}, {'url': [u'Missing value']}]
        }, pformat(errors)

        data["id"] = package_id

        converted_data, errors = validate(data,
                                          default_update_package_schema(),
                                          self.context)

        assert errors == {
            'resources': [{}, {'url': [u'Missing value']}]
        }, pformat(errors)

        data['name'] = '????jfaiofjioafjij'

        converted_data, errors = validate(data,
                                          default_update_package_schema(),
                                          self.context)
        assert errors == {
            'name': [u'Must be purely lowercase alphanumeric (ascii) '
                     'characters and these symbols: -_'],
            'resources': [{}, {'url': [u'Missing value']}]
        }, pformat(errors)
Beispiel #29
0
    def test_package_schema(self):
        group1 = factories.Group(title="Dave's books")
        group2 = factories.Group(title="Roger's books")
        first_name = factories.Dataset.stub().name
        second_name = factories.Dataset.stub().name
        expected_data = {
            "extras": [
                {"key": u"genre", "value": u"romantic novel"},
                {"key": u"original media", "value": u"book"},
            ],
            "groups": [
                {u"name": group1["name"], u"title": group1["title"]},
                {u"name": group2["name"], u"title": group2["title"]},
            ],
            "license_id": u"other-open",
            "name": first_name,
            "type": u"dataset",
            "notes": u"Some test notes\n\n### A 3rd level heading\n\n**Some bolded text.**\n\n*Some italicized text.*\n\nForeign characters:\nu with umlaut \xfc\n66-style quote \u201c\nforeign word: th\xfcmb\n\nNeeds escaping:\nleft arrow <\n\n<http://ckan.net/>\n\n",
            "private": False,
            "resources": [
                {
                    "alt_url": u"alt123",
                    "description": u'Full text. Needs escaping: " Umlaut: \xfc',
                    "format": u"plain text",
                    "hash": u"abc123",
                    "size_extra": u"123",
                    "url": u"http://datahub.io/download/x=1&y=2",
                },
                {
                    "alt_url": u"alt345",
                    "description": u"Index of the novel",
                    "format": u"JSON",
                    "hash": u"def456",
                    "size_extra": u"345",
                    "url": u"http://datahub.io/index.json",
                },
            ],
            "tags": sorted([
                {"name": factories.Tag.stub().name},
                {"name": factories.Tag.stub().name},
                {"name": factories.Tag.stub().name},
            ], key=operator.itemgetter("name")),
            "title": u"A Novel By Tolstoy",
            "url": u"http://datahub.io",
            "version": u"0.7a",
            "relationships_as_subject": [],
            "relationships_as_object": [],
        }

        context = {"model": model, "session": model.Session}
        pkg = factories.Dataset.model(**expected_data)

        package_id = pkg.id
        result = package_dictize(pkg, context)
        self.remove_changable_columns(result)

        result["name"] = second_name
        expected_data["name"] = second_name
        converted_data, errors = validate(
            result, default_create_package_schema(), context
        )

        assert converted_data == expected_data, pformat(converted_data)
        assert not errors, errors

        data = converted_data
        data["name"] = first_name
        data.pop("title")
        data["resources"][0]["url"] = "fsdfafasfsaf"
        data["resources"][1].pop("url")

        converted_data, errors = validate(
            data, default_create_package_schema(), context
        )

        assert errors == {"name": [u"That URL is already in use."]}, pformat(
            errors
        )

        data["id"] = package_id
        data["name"] = "????jfaiofjioafjij"

        converted_data, errors = validate(
            data, default_update_package_schema(), context
        )
        assert errors == {
            "name": [
                u"Must be purely lowercase alphanumeric (ascii) "
                "characters and these symbols: -_"
            ]
        }, pformat(errors)
    def test_1_package_schema(self):
        pkg = model.Session.query(model.Package)\
            .filter_by(name='annakarenina')\
            .first()

        package_id = pkg.id
        result = package_dictize(pkg, self.context)
        self.remove_changable_columns(result)

        result['name'] = 'anna2'
        # we need to remove these as they have been added
        del result['relationships_as_object']
        del result['relationships_as_subject']

        converted_data, errors = validate(result,
                                          default_create_package_schema(),
                                          self.context)

        expected_data = {
            'extras': [{
                'key': u'genre',
                'value': u'romantic novel'
            }, {
                'key': u'original media',
                'value': u'book'
            }],
            'groups': [{
                u'name': u'david',
                u'title': u"Dave's books"
            }, {
                u'name': u'roger',
                u'title': u"Roger's books"
            }],
            'license_id':
            u'other-open',
            'name':
            u'anna2',
            'type':
            u'dataset',
            'notes':
            u'Some test notes\n\n### A 3rd level heading\n\n**Some bolded text.**\n\n*Some italicized text.*\n\nForeign characters:\nu with umlaut \xfc\n66-style quote \u201c\nforeign word: th\xfcmb\n\nNeeds escaping:\nleft arrow <\n\n<http://ckan.net/>\n\n',
            'private':
            False,
            'resources': [{
                'alt_url':
                u'alt123',
                'description':
                u'Full text. Needs escaping: " Umlaut: \xfc',
                'format':
                u'plain text',
                'hash':
                u'abc123',
                'size_extra':
                u'123',
                'url':
                u'http://www.annakarenina.com/download/x=1&y=2'
            }, {
                'alt_url': u'alt345',
                'description': u'Index of the novel',
                'format': u'JSON',
                'hash': u'def456',
                'size_extra': u'345',
                'url': u'http://www.annakarenina.com/index.json'
            }],
            'tags': [{
                'name': u'Flexible \u30a1'
            }, {
                'name': u'russian'
            }, {
                'name': u'tolstoy'
            }],
            'title':
            u'A Novel By Tolstoy',
            'url':
            u'http://www.annakarenina.com',
            'version':
            u'0.7a'
        }

        assert converted_data == expected_data, pformat(converted_data)
        assert not errors, errors

        data = converted_data
        data['name'] = u'annakarenina'
        data.pop("title")
        data["resources"][0]["url"] = 'fsdfafasfsaf'
        data["resources"][1].pop("url")

        converted_data, errors = validate(data,
                                          default_create_package_schema(),
                                          self.context)

        assert errors == {
            'name': [u'That URL is already in use.'],
            'resources': [{}, {
                'url': [u'Missing value']
            }]
        }, pformat(errors)

        data["id"] = package_id

        converted_data, errors = validate(data,
                                          default_update_package_schema(),
                                          self.context)

        assert errors == {
            'resources': [{}, {
                'url': [u'Missing value']
            }]
        }, pformat(errors)

        data['name'] = '????jfaiofjioafjij'

        converted_data, errors = validate(data,
                                          default_update_package_schema(),
                                          self.context)
        assert errors == {
            'name': [
                u'Url must be purely lowercase alphanumeric (ascii) '
                'characters and these symbols: -_'
            ],
            'resources': [{}, {
                'url': [u'Missing value']
            }]
        }, pformat(errors)
Beispiel #31
0
 def update_package_schema(self) -> Schema:
     return schema.default_update_package_schema()
Beispiel #32
0
    def setup(self):
        print("")
        print("TestUM:setup() before each test method")

        # Add sysadmin user
        self.harvestUser = model.User(name=u'harvest',
                                      password=u'test',
                                      sysadmin=True)
        model.Session.add(self.harvestUser)
        model.Session.commit()

        source_fixture = {
            'title': 'Test Source',
            'name': 'test-source',
            'url': u'xml/sample.xml',
            'source_type': u'ngds'
        }

        context = {
            'model': model,
            'session': model.Session,
            'user': u'harvest'
        }

        if config.get('ckan.harvest.auth.profile') == u'publisher' \
           and not 'publisher_id' in source_fixture:
            source_fixture['publisher_id'] = self.publisher.id

        source_dict = get_action('harvest_source_create')(context,
                                                          source_fixture)
        self.oHarvestSource = HarvestSource.get(source_dict['id'])

        job_dict = get_action('harvest_job_create')(
            context, {
                'source_id': self.oHarvestSource.id
            })
        self.oHarvestJob = HarvestJob.get(job_dict['id'])

        context = {
            'model': model,
            'session': model.Session,
            'ignore_auth': True,
        }

        data_dict = {
            'guid': 'guid',
            'content': self.contentDataset,
            'job_id': self.oHarvestJob.id,
            'extras': {
                'a key': 'a value'
            },
        }

        oHarvestObject = toolkit.get_action('harvest_object_create')(context,
                                                                     data_dict)
        self.oHarvestObject = HarvestObject.get(oHarvestObject['id'])

        package_schema = default_update_package_schema()
        self.context = {
            'model': model,
            'session': model.Session,
            'user': u'harvest',
            'schema': package_schema,
            'api_version': '2'
        }
Beispiel #33
0
 def update_package_schema(self):
     schema = default_update_package_schema()
     schema.update({
         'vocab_tags': [ignore_missing, convert_to_tags(TEST_VOCAB_NAME)],
     })
     return schema
 def update_package_schema(self):
     return default_schema.default_update_package_schema()
Beispiel #35
0
    def test_1_package_schema(self):
        pkg = (
            model.Session.query(model.Package)
            .filter_by(name="annakarenina")
            .first()
        )

        package_id = pkg.id
        result = package_dictize(pkg, self.context)
        self.remove_changable_columns(result)

        result["name"] = "anna2"
        # we need to remove these as they have been added
        del result["relationships_as_object"]
        del result["relationships_as_subject"]

        converted_data, errors = validate(
            result, default_create_package_schema(), self.context
        )

        expected_data = {
            "extras": [
                {"key": u"genre", "value": u"romantic novel"},
                {"key": u"original media", "value": u"book"},
            ],
            "groups": [
                {u"name": u"david", u"title": u"Dave's books"},
                {u"name": u"roger", u"title": u"Roger's books"},
            ],
            "license_id": u"other-open",
            "name": u"anna2",
            "type": u"dataset",
            "notes": u"Some test notes\n\n### A 3rd level heading\n\n**Some bolded text.**\n\n*Some italicized text.*\n\nForeign characters:\nu with umlaut \xfc\n66-style quote \u201c\nforeign word: th\xfcmb\n\nNeeds escaping:\nleft arrow <\n\n<http://ckan.net/>\n\n",
            "private": False,
            "resources": [
                {
                    "alt_url": u"alt123",
                    "description": u'Full text. Needs escaping: " Umlaut: \xfc',
                    "format": u"plain text",
                    "hash": u"abc123",
                    "size_extra": u"123",
                    "url": u"http://datahub.io/download/x=1&y=2",
                },
                {
                    "alt_url": u"alt345",
                    "description": u"Index of the novel",
                    "format": u"JSON",
                    "hash": u"def456",
                    "size_extra": u"345",
                    "url": u"http://datahub.io/index.json",
                },
            ],
            "tags": [
                {"name": u"Flexible \u30a1"},
                {"name": u"russian"},
                {"name": u"tolstoy"},
            ],
            "title": u"A Novel By Tolstoy",
            "url": u"http://datahub.io",
            "version": u"0.7a",
        }

        assert converted_data == expected_data, pformat(converted_data)
        assert not errors, errors

        data = converted_data
        data["name"] = u"annakarenina"
        data.pop("title")
        data["resources"][0]["url"] = "fsdfafasfsaf"
        data["resources"][1].pop("url")

        converted_data, errors = validate(
            data, default_create_package_schema(), self.context
        )

        assert errors == {"name": [u"That URL is already in use."]}, pformat(
            errors
        )

        data["id"] = package_id
        data["name"] = "????jfaiofjioafjij"

        converted_data, errors = validate(
            data, default_update_package_schema(), self.context
        )
        assert errors == {
            "name": [
                u"Must be purely lowercase alphanumeric (ascii) "
                "characters and these symbols: -_"
            ]
        }, pformat(errors)
Beispiel #36
0
 def update_package_schema(self):
     return default_schema.default_update_package_schema()
Beispiel #37
0
 def update_package_schema(self):
     from ckan.logic.schema import default_update_package_schema
     schema = schema_defs.update_package_schema(
         default_update_package_schema())
     schema = self._modify_package_schema(schema)
     return schema