def update_package_schema(): """ Add our custom fields for validation from the form """ schema = default_update_package_schema() _schema_update(schema, 'update') return schema
def update_package_schema(self): schema = default_update_package_schema() schema.update({ 'vocab_tags': [ignore_missing, convert_to_tags(TEST_VOCAB_NAME)], }) return schema
def form_to_db_schema_options(self, options={}): context = options.get('context', {}) schema = context.get('schema', None) if schema: return schema elif options.get('api'): if options.get('type') == 'create': return default_schema.default_create_package_schema() else: return default_schema.default_update_package_schema() schema = self.form_to_db_schema() # Sysadmins can save UKLP datasets with looser validation # constraints. This is because UKLP datasets are created using # a custom schema passed in from the harvester. However, when it # comes to re-saving the dataset via the dataset form, there are # some validation requirements we need to drop. That's what this # section of code does. pkg = context.get('package') user = context.get('user', '') if Authorizer().is_sysadmin(unicode(user)) and \ pkg and pkg.extras.get('UKLP', 'False') == 'True': schema.update(self._uklp_sysadmin_schema_updates) return schema
def project_update_schema(): schema = default_update_package_schema() schema.update(project_schema()) schema.update({ '__after': [update_cadasta_project], }) return schema
def form_to_db_schema_options(self, options={}): context = options.get('context', {}) schema = context.get('schema',None) if schema: return schema elif options.get('api'): if options.get('type') == 'create': return default_schema.default_create_package_schema() else: return default_schema.default_update_package_schema() schema = self.form_to_db_schema() # Sysadmins can save UKLP datasets with looser validation # constraints. This is because UKLP datasets are created using # a custom schema passed in from the harvester. However, when it # comes to re-saving the dataset via the dataset form, there are # some validation requirements we need to drop. That's what this # section of code does. pkg = context.get('package') user = context.get('user', '') if Authorizer().is_sysadmin(unicode(user)) and \ pkg and pkg.extras.get('UKLP', 'False') == 'True': schema.update(self._uklp_sysadmin_schema_updates) if Authorizer().is_sysadmin(unicode(user)) and \ pkg and pkg.extras.get('external_reference') == 'ONSHUB': self._ons_sysadmin_schema_updates(schema) return schema
def setup(self): # Add sysadmin user harvest_user = model.User(name=u'harvest', password=u'test') model.add_user_to_role(harvest_user, model.Role.ADMIN, model.System()) Session.add(harvest_user) Session.commit() package_schema = default_update_package_schema() self.context ={'model':model, 'session':Session, 'user':u'harvest', 'schema':package_schema, 'api_version': '2'} if config.get('ckan.harvest.auth.profile') == u'publisher': # Create a publisher user rev = model.repo.new_revision() self.publisher_user = model.User(name=u'test-publisher-user',password=u'test') self.publisher = model.Group(name=u'test-publisher',title=u'Test Publihser',type=u'publisher') Session.add(self.publisher_user) Session.add(self.publisher) Session.commit() member = model.Member(table_name = 'user', table_id = self.publisher_user.id, group=self.publisher, capacity='admin') Session.add(member) Session.commit()
def setup(self): # Add sysadmin user harvest_user = model.User(name=u"harvest", password=u"test") model.add_user_to_role(harvest_user, model.Role.ADMIN, model.System()) Session.add(harvest_user) Session.commit() package_schema = default_update_package_schema() self.context = { "model": model, "session": Session, "user": u"harvest", "schema": package_schema, "api_version": "2", } if config.get("ckan.harvest.auth.profile") == u"publisher": # Create a publisher user rev = model.repo.new_revision() self.publisher_user = model.User(name=u"test-publisher-user", password=u"test") self.publisher = model.Group(name=u"test-publisher", title=u"Test Publihser", type=u"publisher") Session.add(self.publisher_user) Session.add(self.publisher) Session.commit() member = model.Member( table_name="user", table_id=self.publisher_user.id, group=self.publisher, capacity="admin" ) Session.add(member) Session.commit()
def package_update_validate(context, data_dict): model = context['model'] user = context['user'] id = data_dict["id"] schema = context.get('schema') or default_update_package_schema() model.Session.remove() model.Session()._context = context pkg = model.Package.get(id) context["package"] = pkg if pkg is None: raise NotFound(_('Package was not found.')) data_dict["id"] = pkg.id check_access('package_update', context, data_dict) data, errors = validate(data_dict, schema, context) if errors: model.Session.rollback() raise ValidationError(errors, package_error_summary(errors)) return data
def update_package_schema(): schema = default_update_package_schema() _modify_schema(schema) name_validator = get_validator('name_validator') schema['name'] = [not_empty, unicode, trim_string(100), name_validator, no_pending_dataset_with_same_name] return schema
def setup(self): print ("") print ("TestUM:setup() before each test method") # Add sysadmin user self.harvestUser = model.User(name=u'harvest', password=u'test', sysadmin=True) model.Session.add(self.harvestUser) model.Session.commit() source_fixture = { 'title': 'Test Source', 'name': 'test-source', 'url': u'xml/sample.xml', 'source_type': u'ngds' } context = { 'model': model, 'session': model.Session, 'user': u'harvest' } if config.get('ckan.harvest.auth.profile') == u'publisher' \ and not 'publisher_id' in source_fixture: source_fixture['publisher_id'] = self.publisher.id source_dict=get_action('harvest_source_create')(context, source_fixture) self.oHarvestSource = HarvestSource.get(source_dict['id']) job_dict=get_action('harvest_job_create')(context,{'source_id': self.oHarvestSource.id}) self.oHarvestJob = HarvestJob.get(job_dict['id']) context = { 'model' : model, 'session': model.Session, 'ignore_auth': True, } data_dict = { 'guid' : 'guid', 'content' : self.contentDataset, 'job_id' : self.oHarvestJob.id, 'extras' : { 'a key' : 'a value' }, } oHarvestObject = toolkit.get_action('harvest_object_create')(context, data_dict) self.oHarvestObject = HarvestObject.get(oHarvestObject['id']) package_schema = default_update_package_schema() self.context = { 'model':model, 'session': model.Session, 'user':u'harvest', 'schema':package_schema, 'api_version': '2' }
def setup(self): # Add sysadmin user harvest_user = model.User(name=u'harvest', password=u'test', sysadmin=True) Session.add(harvest_user) Session.commit() package_schema = default_update_package_schema() self.context ={'model':model, 'session':Session, 'user':u'harvest', 'schema':package_schema, 'api_version': '2'}
def setup(self): super(FisbrokerTestBase, self).setup() reset_mock_server() # Add sysadmin user user_name = u'harvest' harvest_user = model.User(name=user_name, password=u'test', sysadmin=True) Session.add(harvest_user) Session.commit() package_schema = default_update_package_schema() self.context = { 'model': model, 'session': Session, 'user': user_name, 'schema': package_schema, 'api_version': '2' }
def package_update(self, context, data_dict): preview = context.get('preview', False) schema = context.get('schema') or default_update_package_schema() if preview: return if 'id' not in data_dict: raise NotFound result = self.engine.execute( select( [self.package_table.c.nid], or_(self.package_table.c.id == data_dict['id'], self.package_table.c.name == data_dict['id']) ) ).fetchone() if not result: raise NotFound nid = result['nid'] data_dict['body'] = data_dict['notes'] ## run through validate to make sure tags are in correct place data, errors = validate(data_dict, schema, context) terms = {} for num, tag in enumerate(data.get('tags', [])): terms[str(num)] = tag['name'] data_dict['terms'] = terms if data_dict.get('state', 'active') == 'active': data_dict['status'] = '1' else: data_dict['status'] = '0' url = urlparse.urljoin(self.base_url, 'services/package/%s.json' % (nid)) data = json.dumps({'data': data_dict}) req = urllib2.Request(url, data, {'Content-type': 'application/json'}) req.get_method = lambda: 'PUT' ##XXX think about error conditions a bit more f = urllib2.urlopen(req, None, 3) try: drupal_info = json.loads(f.read()) finally: f.close() session = context['model'].Session context['nid'] = result['nid'] package_update = update.package_update(context, data_dict) package_update['nid'] = result['nid'] package_update['revision_message'] = '%s-%s'%(session.revision.id,session.revision.message) return package_update
def package_update(self, context, data_dict): preview = context.get('preview', False) schema = context.get('schema') or default_update_package_schema() if preview: return if 'id' not in data_dict: raise NotFound result = self.engine.execute( select( [self.package_table.c.nid], or_(self.package_table.c.id == data_dict['id'], self.package_table.c.name == data_dict['id']))).fetchone() if not result: raise NotFound nid = result['nid'] data_dict['body'] = data_dict['notes'] ## run through validate to make sure tags are in correct place data, errors = validate(data_dict, schema, context) terms = {} for num, tag in enumerate(data.get('tags', [])): terms[str(num)] = tag['name'] data_dict['terms'] = terms if data_dict.get('state', 'active') == 'active': data_dict['status'] = '1' else: data_dict['status'] = '0' url = urlparse.urljoin(self.base_url, 'services/package/%s.json' % (nid)) data = json.dumps({'data': data_dict}) req = urllib2.Request(url, data, {'Content-type': 'application/json'}) req.get_method = lambda: 'PUT' ##XXX think about error conditions a bit more f = urllib2.urlopen(req, None, 3) try: drupal_info = json.loads(f.read()) finally: f.close() session = context['model'].Session context['nid'] = result['nid'] package_update = update.package_update(context, data_dict) package_update['nid'] = result['nid'] package_update['revision_message'] = '%s-%s' % ( session.revision.id, session.revision.message) return package_update
def validate(self, context, data_dict, schema, action): if action in ('package_update', 'package_create'): # If the caller to package_update specified a schema (e.g. # harvesters specify the default schema) then we don't want to # override that. if not context.get('schema'): if 'api_version' in context: # When accessed by the API, just use the default schemas. # It's only the forms that are customized to make it easier # for humans. if action == 'package_create': schema = default_schema.default_create_package_schema() else: schema = default_schema.default_update_package_schema() else: # Customized schema for DGU form schema = self.form_to_db_schema_options(context) return toolkit.navl_validate(data_dict, schema, context)
def package_update(context, data_dict): model = context['model'] user = context['user'] id = data_dict["id"] preview = context.get('preview', False) schema = context.get('schema') or default_update_package_schema() model.Session.remove() model.Session()._context = context pkg = model.Package.get(id) context["package"] = pkg if pkg is None: raise NotFound(_('Package was not found.')) data_dict["id"] = pkg.id check_access(pkg, model.Action.EDIT, context) data, errors = validate(data_dict, schema, context) check_group_auth(context, data) if errors: model.Session.rollback() raise ValidationError(errors, package_error_summary(errors)) if not preview: rev = model.repo.new_revision() rev.author = user if 'message' in context: rev.message = context['message'] else: rev.message = _(u'REST API: Update object %s') % data.get("name") pkg = package_dict_save(data, context) if not preview: for item in PluginImplementations(IPackageController): item.edit(pkg) model.repo.commit() return package_dictize(pkg, context) return data
def package_update_schema(): schema = default_update_package_schema() schema.update({ 'frequency_time_modifier': [ignore_missing, unicode, convert_to_extras], 'frequency_count': [ignore_missing, convert_to_extras], 'frequency_update_period': [ignore_missing, unicode, convert_to_extras], 'frequency_period': [ignore_missing, unicode, convert_to_extras], # frequency is constructed from the other frequency_ fields 'frequency': [ignore_missing], 'retention_count': [ignore_missing, is_positive_integer, convert_to_extras], 'retention_period': [ignore_missing, unicode, convert_to_extras], 'delivery_unit': [ignore_missing, unicode, convert_to_extras], 'service': [ignore_missing, unicode, convert_to_extras], 'next_update': [ignore_missing, unicode, convert_to_extras], 'review_date': [ignore_missing, unicode, convert_to_extras], 'coverage_start_date': [ignore_missing, unicode, convert_to_extras], 'coverage_end_date': [ignore_missing, unicode, convert_to_extras], }) return schema
def package_update(context, data_dict): model = context["model"] user = context["user"] id = data_dict["id"] schema = context.get("schema") or default_update_package_schema() model.Session.remove() model.Session()._context = context pkg = model.Package.get(id) context["package"] = pkg if pkg is None: raise NotFound(_("Package was not found.")) data_dict["id"] = pkg.id check_access("package_update", context, data_dict) data, errors = validate(data_dict, schema, context) if errors: model.Session.rollback() raise ValidationError(errors, package_error_summary(errors)) rev = model.repo.new_revision() rev.author = user if "message" in context: rev.message = context["message"] else: rev.message = _(u"REST API: Update object %s") % data.get("name") pkg = package_dict_save(data, context) for item in PluginImplementations(IPackageController): item.edit(pkg) if not context.get("defer_commit"): model.repo.commit() return package_dictize(pkg, context)
def setup(self): # Add sysadmin user harvest_user = model.User(name=u'harvest', password=u'test') model.add_user_to_role(harvest_user, model.Role.ADMIN, model.System()) Session.add(harvest_user) Session.commit() package_schema = default_update_package_schema() self.context = { 'model': model, 'session': Session, 'user': u'harvest', 'schema': package_schema, 'api_version': '2' } if config.get('ckan.harvest.auth.profile') == u'publisher': # Create a publisher user rev = model.repo.new_revision() self.publisher_user = model.User(name=u'test-publisher-user', password=u'test') self.publisher = model.Group(name=u'test-publisher', title=u'Test Publihser', type=u'publisher') Session.add(self.publisher_user) Session.add(self.publisher) Session.commit() member = model.Member(table_name='user', table_id=self.publisher_user.id, group=self.publisher, capacity='admin') Session.add(member) Session.commit()
def test_clean_tags(self): # Create source source_fixture = { 'title': 'Test Source', 'name': 'test-source', 'url': u'http://127.0.0.1:8999/gemini2.1/dataset1.xml', 'source_type': u'gemini-single', 'owner_org': 'test-org', 'metadata_created': datetime.now().strftime('%YYYY-%MM-%DD %HH:%MM:%s'), 'metadata_modified': datetime.now().strftime('%YYYY-%MM-%DD %HH:%MM:%s'), } user = User.get('dummy') if not user: user = call_action('user_create', name='dummy', password='******', email='*****@*****.**') user_name = user['name'] else: user_name = user.name org = Group.by_name('test-org') if org is None: org = call_action('organization_create', context={'user': user_name}, name='test-org') existing_g = Group.by_name('existing-group') if existing_g is None: existing_g = call_action('group_create', context={'user': user_name}, name='existing-group') context = {'user': '******'} package_schema = default_update_package_schema() context['schema'] = package_schema package_dict = {'frequency': 'manual', 'publisher_name': 'dummy', 'extras': [{'key':'theme', 'value':['non-mappable', 'thememap1']}], 'groups': [], 'title': 'fakename', 'holder_name': 'dummy', 'holder_identifier': 'dummy', 'name': 'fakename', 'notes': 'dummy', 'owner_org': 'test-org', 'modified': datetime.now(), 'publisher_identifier': 'dummy', 'metadata_created' : datetime.now(), 'metadata_modified' : datetime.now(), 'guid': unicode(uuid4()), 'identifier': 'dummy'} package_data = call_action('package_create', context=context, **package_dict) package = Package.get('fakename') source, job = self._create_source_and_job(source_fixture) job.package = package job.guid = uuid4() harvester = SpatialHarvester() with open(os.path.join('..', 'data', 'dataset.json')) as f: dataset = json.load(f) # long tags are invalid in all cases TAG_LONG_INVALID = 'abcdefghij' * 20 # if clean_tags is not set to true, tags will be truncated to 50 chars TAG_LONG_VALID = TAG_LONG_INVALID[:50] # default truncate to 100 TAG_LONG_VALID_LONG = TAG_LONG_INVALID[:100] assert len(TAG_LONG_VALID) == 50 assert TAG_LONG_VALID[-1] == 'j' TAG_CHARS_INVALID = '[email protected]!' TAG_CHARS_VALID = 'pretty-invlidtag' dataset['tags'].append(TAG_LONG_INVALID) dataset['tags'].append(TAG_CHARS_INVALID) harvester.source_config = {'clean_tags': False} out = harvester.get_package_dict(dataset, job) tags = out['tags'] # no clean tags, so invalid chars are in # but tags are truncated to 50 chars assert {'name': TAG_CHARS_VALID} not in tags assert {'name': TAG_CHARS_INVALID} in tags assert {'name': TAG_LONG_VALID_LONG} in tags assert {'name': TAG_LONG_INVALID} not in tags harvester.source_config = {'clean_tags': True} out = harvester.get_package_dict(dataset, job) tags = out['tags'] assert {'name': TAG_CHARS_VALID} in tags assert {'name': TAG_LONG_VALID_LONG} in tags
class DCATdeMigrateCommand(tk.CkanCommand): ''' Migrates CKAN datasets from OGD to DCAT-AP.de. Usage: dcatde_migrate [dry-run] [adms-id-migrate] Params: dry-run If given, perform all migration tasks without saving. A full log file is written. adms-id-migrate If given, only migrate adms:identifier to dct:identifier for all affected datasets. contributor-id-migrate If given, set a contributor-ID for all datasets without an ID. Connect with "nc -ul 5005" on the same machine to receive status updates. ''' summary = __doc__.split('\n')[0] usage = __doc__ UDP_IP = "127.0.0.1" UDP_PORT = 5005 # constants for different migration modes MODE_OGD = 0 MODE_ADMS_ID = 1 MODE_CONTRIBUTOR_ID = 2 dry_run = False migration_mode = MODE_OGD PACKAGE_UPDATE_SCHEMA = schema_.default_update_package_schema() def __init__(self, name): super(DCATdeMigrateCommand, self).__init__(name) self.executor = None # initialized after config load try: email_validator = tk.get_validator('email_validator') self.PACKAGE_UPDATE_SCHEMA['maintainer_email'].remove( email_validator) self.PACKAGE_UPDATE_SCHEMA['author_email'].remove(email_validator) except (ValueError, UnknownValidator): pass def create_context(self): ''' Creates new context. ''' return {'model': model, 'ignore_auth': True} def command(self): ''' Executes command. ''' for cmd in self.args: if cmd == 'dry-run': self.dry_run = True elif cmd == 'adms-id-migrate': self.migration_mode = self.MODE_ADMS_ID elif cmd == 'contributor-id-migrate': self.migration_mode = self.MODE_CONTRIBUTOR_ID else: print 'Command %s not recognized' % cmd self.parser.print_usage() sys.exit(1) self._load_config() if self.migration_mode == self.MODE_ADMS_ID: self.migrate_adms_identifier() elif self.migration_mode == self.MODE_CONTRIBUTOR_ID: self.migrate_contributor_identifier() else: self.executor = migration_functions.MigrationFunctionExecutor( pylons.config.get('ckanext.dcatde.urls.license_mapping'), pylons.config.get('ckanext.dcatde.urls.category_mapping')) self.migrate_datasets() def migrate_datasets(self): ''' Iterates over all datasets and migrates fields with 'migration_functions' ''' # Check if all needed groups are present group_list = tk.get_action('group_list') if not self.executor.check_group_presence( group_list(self.create_context(), {})): return util.get_migrator_log().info('Starting dataset migration' + ( ' [dry run without saving]' if self.dry_run else '')) # Change the type of all datasets to 'dataset' via DB query, as package_update() doesn't # allow to set the type if not self.dry_run: model.Session.query(model.Package)\ .filter(or_((model.Package.type == "datensatz"), (model.Package.type == "app"), (model.Package.type == "dokument")))\ .update({"type": u'dataset'}) model.repo.commit() for dataset in self.iterate_local_datasets(): self.executor.apply_to(dataset) self.update_dataset(dataset) util.get_migrator_log().info('Dataset migration finished' + ( ' [dry run, did not save]' if self.dry_run else '')) def migrate_adms_identifier(self): util.get_migrator_log().info( 'Migrating adms:identifier to dct:identifier' + (' [dry run without saving]' if self.dry_run else '')) for dataset in self.iterate_adms_id_datasets(): # only migrate if dct:identifier is not already present if not dataset_utils.get_extras_field(dataset, EXTRA_KEY_DCT_IDENTIFIER): util.rename_extras_field_migration(dataset, EXTRA_KEY_ADMS_IDENTIFIER, EXTRA_KEY_DCT_IDENTIFIER, False) self.update_dataset(dataset) else: util.get_migrator_log().info( '%sSkipping package as it already has a dct:identifier', util.log_dataset_prefix(dataset)) util.get_migrator_log().info( 'Finished migration of adms:identifier to dct:identifier' + (' [dry run without saving]' if self.dry_run else '')) def migrate_contributor_identifier(self): ''' Add govdata-contributor-IDs to datasets that are missing one ''' util.get_migrator_log().info('Migrating dcatde:contributorID' + ( ' [dry run without saving]' if self.dry_run else '')) starttime = time.time() package_obj_to_update = gather_dataset_ids() endtime = time.time() print "INFO: %s datasets found to check for contributor-ID. Total time: %s." % \ (len(package_obj_to_update), str(endtime - starttime)) organization_list = tk.get_action('organization_list')( self.create_context(), { 'all_fields': True, 'include_extras': True }) updated_count = created_count = 0 starttime = time.time() for dataset in self.iterate_datasets(package_obj_to_update.keys()): print u'Updating dataset: {}'.format(dataset['title']) dataset_org_id = dataset['organization']['id'] dataset_org = next((item for item in organization_list if item['id'] == dataset_org_id), None) if not dataset_org: print u'Did not find a Organization for ID: ' + dataset_org_id continue org_contributor_field = get_extras_field(dataset_org, EXTRA_KEY_CONTRIBUTOR_ID) if not org_contributor_field: print u'Did not find a contributor ID for Organization: ' + dataset_org_id continue try: org_contributor_id_list = json.loads( org_contributor_field['value']) except ValueError: # json.loads failed -> value is not an array but a single string org_contributor_id_list = [org_contributor_field['value']] dataset_contributor_field = get_extras_field( dataset, EXTRA_KEY_CONTRIBUTOR_ID) requires_update = False if not dataset_contributor_field: # Contributor-id field does not exist yet set_extras_field(dataset, EXTRA_KEY_CONTRIBUTOR_ID, json.dumps(org_contributor_id_list)) created_count = created_count + 1 requires_update = True else: try: current_ids_list = json.loads( dataset_contributor_field['value']) except ValueError: # json.loads failed -> value is not an array but a single string current_ids_list = [dataset_contributor_field['value']] for contributor_id in org_contributor_id_list: if contributor_id not in current_ids_list: current_ids_list.append(contributor_id) requires_update = True if requires_update: updated_count = updated_count + 1 set_extras_field(dataset, EXTRA_KEY_CONTRIBUTOR_ID, json.dumps(current_ids_list)) if requires_update: self.update_dataset(dataset) endtime = time.time() print "INFO: A Contributor-ID was created for %s datasets that did not have one before." % \ created_count print "INFO: %s datasets were updated. Total time: %s." % ( updated_count, str(endtime - starttime)) util.get_migrator_log().info( 'Finished migration of dcatde:contributorID' + (' [dry run without saving]' if self.dry_run else '')) def iterate_datasets(self, package_ids): ''' Helper which iterates over all datasets in package_ids, i.e. fetches the package for all IDs ''' package_show = tk.get_action('package_show') package_ids_unique = set(package_ids) progress_total = len(package_ids_unique) util.get_migrator_log().info('INFO migrating ' + str(progress_total) + ' datasets in total') progress_current = 0 sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) for dataset_id in package_ids_unique: try: # write out status via UDP (see class doc for netcat cmd) progress_current += 1 sock.sendto( str(progress_current) + " / " + str(progress_total) + "\n", (self.UDP_IP, self.UDP_PORT)) dataset = package_show(self.create_context(), {'id': dataset_id.strip()}) # ignore harvesters, which are in the list as well if dataset['type'] == 'harvest': continue yield dataset except Exception: util.get_migrator_log().exception("Package '%s' was not found", dataset_id) def iterate_local_datasets(self): ''' Iterates over all local datasets ''' package_list = tk.get_action('package_list') # returns only active datasets (missing datasets with status "private" and "draft") package_ids = package_list(self.create_context(), {}) # Query all private and draft packages except harvest packages query = model.Session.query(model.Package)\ .filter(or_(model.Package.private == True, model.Package.state == 'draft'))\ .filter(model.Package.type != 'harvest') for package_object in query: package_ids.append(package_object.id) return self.iterate_datasets(package_ids) def iterate_adms_id_datasets(self): ''' Iterates over all datasets having an adms:identifier (extras.alternate_identifier) field ''' query = model.Session.query(model.PackageExtra.package_id) \ .filter(model.PackageExtra.key == EXTRA_KEY_ADMS_IDENTIFIER) \ .filter(model.PackageExtra.state != 'deleted') package_ids = [] for package_object in query: package_ids.append(package_object.package_id) return self.iterate_datasets(package_ids) def update_dataset(self, dataset): ''' Updates dataset in CKAN. ''' if not self.dry_run: try: package_update = tk.get_action('package_update') ctx = self.create_context() ctx['schema'] = self.PACKAGE_UPDATE_SCHEMA ctx['return_id_only'] = True package_update(ctx, dataset) except Exception: util.get_migrator_log().exception( util.log_dataset_prefix(dataset) + 'could not update')
def update_package_schema(): schema = default_update_package_schema() _modify_schema(schema) return schema
def test_1_package_schema(self): pkg = model.Session.query(model.Package)\ .filter_by(name='annakarenina')\ .first() package_id = pkg.id result = package_dictize(pkg, self.context) self.remove_changable_columns(result) result['name'] = 'anna2' # we need to remove these as they have been added del result['relationships_as_object'] del result['relationships_as_subject'] converted_data, errors = validate(result, default_create_package_schema(), self.context) expected_data = { 'extras': [{'key': u'genre', 'value': u'romantic novel'}, {'key': u'original media', 'value': u'book'}], 'groups': [{u'name': u'david', u'title': u"Dave's books"}, {u'name': u'roger', u'title': u"Roger's books"}], 'license_id': u'other-open', 'name': u'anna2', 'type': u'dataset', 'notes': u'Some test notes\n\n### A 3rd level heading\n\n**Some bolded text.**\n\n*Some italicized text.*\n\nForeign characters:\nu with umlaut \xfc\n66-style quote \u201c\nforeign word: th\xfcmb\n\nNeeds escaping:\nleft arrow <\n\n<http://ckan.net/>\n\n', 'private': False, 'resources': [{'alt_url': u'alt123', 'description': u'Full text. Needs escaping: " Umlaut: \xfc', 'format': u'plain text', 'hash': u'abc123', 'size_extra': u'123', 'url': u'http://datahub.io/download/x=1&y=2'}, {'alt_url': u'alt345', 'description': u'Index of the novel', 'format': u'JSON', 'hash': u'def456', 'size_extra': u'345', 'url': u'http://datahub.io/index.json'}], 'tags': [{'name': u'Flexible \u30a1'}, {'name': u'russian'}, {'name': u'tolstoy'}], 'title': u'A Novel By Tolstoy', 'url': u'http://datahub.io', 'version': u'0.7a' } assert converted_data == expected_data, pformat(converted_data) assert not errors, errors data = converted_data data['name'] = u'annakarenina' data.pop("title") data["resources"][0]["url"] = 'fsdfafasfsaf' data["resources"][1].pop("url") converted_data, errors = validate(data, default_create_package_schema(), self.context) assert errors == { 'name': [u'That URL is already in use.'], 'resources': [{}, {'url': [u'Missing value']}] }, pformat(errors) data["id"] = package_id converted_data, errors = validate(data, default_update_package_schema(), self.context) assert errors == { 'resources': [{}, {'url': [u'Missing value']}] }, pformat(errors) data['name'] = '????jfaiofjioafjij' converted_data, errors = validate(data, default_update_package_schema(), self.context) assert errors == { 'name': [u'Must be purely lowercase alphanumeric (ascii) ' 'characters and these symbols: -_'], 'resources': [{}, {'url': [u'Missing value']}] }, pformat(errors)
def test_package_schema(self): group1 = factories.Group(title="Dave's books") group2 = factories.Group(title="Roger's books") first_name = factories.Dataset.stub().name second_name = factories.Dataset.stub().name expected_data = { "extras": [ {"key": u"genre", "value": u"romantic novel"}, {"key": u"original media", "value": u"book"}, ], "groups": [ {u"name": group1["name"], u"title": group1["title"]}, {u"name": group2["name"], u"title": group2["title"]}, ], "license_id": u"other-open", "name": first_name, "type": u"dataset", "notes": u"Some test notes\n\n### A 3rd level heading\n\n**Some bolded text.**\n\n*Some italicized text.*\n\nForeign characters:\nu with umlaut \xfc\n66-style quote \u201c\nforeign word: th\xfcmb\n\nNeeds escaping:\nleft arrow <\n\n<http://ckan.net/>\n\n", "private": False, "resources": [ { "alt_url": u"alt123", "description": u'Full text. Needs escaping: " Umlaut: \xfc', "format": u"plain text", "hash": u"abc123", "size_extra": u"123", "url": u"http://datahub.io/download/x=1&y=2", }, { "alt_url": u"alt345", "description": u"Index of the novel", "format": u"JSON", "hash": u"def456", "size_extra": u"345", "url": u"http://datahub.io/index.json", }, ], "tags": sorted([ {"name": factories.Tag.stub().name}, {"name": factories.Tag.stub().name}, {"name": factories.Tag.stub().name}, ], key=operator.itemgetter("name")), "title": u"A Novel By Tolstoy", "url": u"http://datahub.io", "version": u"0.7a", "relationships_as_subject": [], "relationships_as_object": [], } context = {"model": model, "session": model.Session} pkg = factories.Dataset.model(**expected_data) package_id = pkg.id result = package_dictize(pkg, context) self.remove_changable_columns(result) result["name"] = second_name expected_data["name"] = second_name converted_data, errors = validate( result, default_create_package_schema(), context ) assert converted_data == expected_data, pformat(converted_data) assert not errors, errors data = converted_data data["name"] = first_name data.pop("title") data["resources"][0]["url"] = "fsdfafasfsaf" data["resources"][1].pop("url") converted_data, errors = validate( data, default_create_package_schema(), context ) assert errors == {"name": [u"That URL is already in use."]}, pformat( errors ) data["id"] = package_id data["name"] = "????jfaiofjioafjij" converted_data, errors = validate( data, default_update_package_schema(), context ) assert errors == { "name": [ u"Must be purely lowercase alphanumeric (ascii) " "characters and these symbols: -_" ] }, pformat(errors)
def test_1_package_schema(self): pkg = model.Session.query(model.Package)\ .filter_by(name='annakarenina')\ .first() package_id = pkg.id result = package_dictize(pkg, self.context) self.remove_changable_columns(result) result['name'] = 'anna2' # we need to remove these as they have been added del result['relationships_as_object'] del result['relationships_as_subject'] converted_data, errors = validate(result, default_create_package_schema(), self.context) expected_data = { 'extras': [{ 'key': u'genre', 'value': u'romantic novel' }, { 'key': u'original media', 'value': u'book' }], 'groups': [{ u'name': u'david', u'title': u"Dave's books" }, { u'name': u'roger', u'title': u"Roger's books" }], 'license_id': u'other-open', 'name': u'anna2', 'type': u'dataset', 'notes': u'Some test notes\n\n### A 3rd level heading\n\n**Some bolded text.**\n\n*Some italicized text.*\n\nForeign characters:\nu with umlaut \xfc\n66-style quote \u201c\nforeign word: th\xfcmb\n\nNeeds escaping:\nleft arrow <\n\n<http://ckan.net/>\n\n', 'private': False, 'resources': [{ 'alt_url': u'alt123', 'description': u'Full text. Needs escaping: " Umlaut: \xfc', 'format': u'plain text', 'hash': u'abc123', 'size_extra': u'123', 'url': u'http://www.annakarenina.com/download/x=1&y=2' }, { 'alt_url': u'alt345', 'description': u'Index of the novel', 'format': u'JSON', 'hash': u'def456', 'size_extra': u'345', 'url': u'http://www.annakarenina.com/index.json' }], 'tags': [{ 'name': u'Flexible \u30a1' }, { 'name': u'russian' }, { 'name': u'tolstoy' }], 'title': u'A Novel By Tolstoy', 'url': u'http://www.annakarenina.com', 'version': u'0.7a' } assert converted_data == expected_data, pformat(converted_data) assert not errors, errors data = converted_data data['name'] = u'annakarenina' data.pop("title") data["resources"][0]["url"] = 'fsdfafasfsaf' data["resources"][1].pop("url") converted_data, errors = validate(data, default_create_package_schema(), self.context) assert errors == { 'name': [u'That URL is already in use.'], 'resources': [{}, { 'url': [u'Missing value'] }] }, pformat(errors) data["id"] = package_id converted_data, errors = validate(data, default_update_package_schema(), self.context) assert errors == { 'resources': [{}, { 'url': [u'Missing value'] }] }, pformat(errors) data['name'] = '????jfaiofjioafjij' converted_data, errors = validate(data, default_update_package_schema(), self.context) assert errors == { 'name': [ u'Url must be purely lowercase alphanumeric (ascii) ' 'characters and these symbols: -_' ], 'resources': [{}, { 'url': [u'Missing value'] }] }, pformat(errors)
def update_package_schema(self) -> Schema: return schema.default_update_package_schema()
def setup(self): print("") print("TestUM:setup() before each test method") # Add sysadmin user self.harvestUser = model.User(name=u'harvest', password=u'test', sysadmin=True) model.Session.add(self.harvestUser) model.Session.commit() source_fixture = { 'title': 'Test Source', 'name': 'test-source', 'url': u'xml/sample.xml', 'source_type': u'ngds' } context = { 'model': model, 'session': model.Session, 'user': u'harvest' } if config.get('ckan.harvest.auth.profile') == u'publisher' \ and not 'publisher_id' in source_fixture: source_fixture['publisher_id'] = self.publisher.id source_dict = get_action('harvest_source_create')(context, source_fixture) self.oHarvestSource = HarvestSource.get(source_dict['id']) job_dict = get_action('harvest_job_create')( context, { 'source_id': self.oHarvestSource.id }) self.oHarvestJob = HarvestJob.get(job_dict['id']) context = { 'model': model, 'session': model.Session, 'ignore_auth': True, } data_dict = { 'guid': 'guid', 'content': self.contentDataset, 'job_id': self.oHarvestJob.id, 'extras': { 'a key': 'a value' }, } oHarvestObject = toolkit.get_action('harvest_object_create')(context, data_dict) self.oHarvestObject = HarvestObject.get(oHarvestObject['id']) package_schema = default_update_package_schema() self.context = { 'model': model, 'session': model.Session, 'user': u'harvest', 'schema': package_schema, 'api_version': '2' }
def update_package_schema(self): return default_schema.default_update_package_schema()
def test_1_package_schema(self): pkg = ( model.Session.query(model.Package) .filter_by(name="annakarenina") .first() ) package_id = pkg.id result = package_dictize(pkg, self.context) self.remove_changable_columns(result) result["name"] = "anna2" # we need to remove these as they have been added del result["relationships_as_object"] del result["relationships_as_subject"] converted_data, errors = validate( result, default_create_package_schema(), self.context ) expected_data = { "extras": [ {"key": u"genre", "value": u"romantic novel"}, {"key": u"original media", "value": u"book"}, ], "groups": [ {u"name": u"david", u"title": u"Dave's books"}, {u"name": u"roger", u"title": u"Roger's books"}, ], "license_id": u"other-open", "name": u"anna2", "type": u"dataset", "notes": u"Some test notes\n\n### A 3rd level heading\n\n**Some bolded text.**\n\n*Some italicized text.*\n\nForeign characters:\nu with umlaut \xfc\n66-style quote \u201c\nforeign word: th\xfcmb\n\nNeeds escaping:\nleft arrow <\n\n<http://ckan.net/>\n\n", "private": False, "resources": [ { "alt_url": u"alt123", "description": u'Full text. Needs escaping: " Umlaut: \xfc', "format": u"plain text", "hash": u"abc123", "size_extra": u"123", "url": u"http://datahub.io/download/x=1&y=2", }, { "alt_url": u"alt345", "description": u"Index of the novel", "format": u"JSON", "hash": u"def456", "size_extra": u"345", "url": u"http://datahub.io/index.json", }, ], "tags": [ {"name": u"Flexible \u30a1"}, {"name": u"russian"}, {"name": u"tolstoy"}, ], "title": u"A Novel By Tolstoy", "url": u"http://datahub.io", "version": u"0.7a", } assert converted_data == expected_data, pformat(converted_data) assert not errors, errors data = converted_data data["name"] = u"annakarenina" data.pop("title") data["resources"][0]["url"] = "fsdfafasfsaf" data["resources"][1].pop("url") converted_data, errors = validate( data, default_create_package_schema(), self.context ) assert errors == {"name": [u"That URL is already in use."]}, pformat( errors ) data["id"] = package_id data["name"] = "????jfaiofjioafjij" converted_data, errors = validate( data, default_update_package_schema(), self.context ) assert errors == { "name": [ u"Must be purely lowercase alphanumeric (ascii) " "characters and these symbols: -_" ] }, pformat(errors)
def update_package_schema(self): from ckan.logic.schema import default_update_package_schema schema = schema_defs.update_package_schema( default_update_package_schema()) schema = self._modify_package_schema(schema) return schema