def portal_update_worker(self, source): """ a process that accepts package ids on stdin which are passed to the package_show API on the remote CKAN instance and compared to the local version of the same package. The local package is then created, updated, deleted or left unchanged. This process outputs that action as a string 'created', 'updated', 'deleted' or 'unchanged' """ registry = RemoteCKAN(source) portal = LocalCKAN() now = datetime.now() for package_id in iter(sys.stdin.readline, ''): try: data = registry.action.package_show(id=package_id.strip()) source_pkg = data['result'] except NotAuthorized: source_pkg = None _trim_package(source_pkg) if source_pkg: # treat unpublished packages same as deleted packages if not source_pkg['portal_release_date'] or isodate( source_pkg['portal_release_date'], None) > now: source_pkg = None try: # don't pass user in context so deleted packages # raise NotAuthorized target_pkg = portal.call_action('package_show', {'id':package_id.strip()}, {}) except (NotFound, NotAuthorized): target_pkg = None _trim_package(target_pkg) if target_pkg is None and source_pkg is None: result = 'unchanged' elif target_pkg is None: # CREATE portal.action.package_create(**source_pkg) result = 'created' elif source_pkg is None: # DELETE portal.action.package_delete(id=package_id.strip()) result = 'deleted' elif source_pkg == target_pkg: result = 'unchanged' else: # UPDATE portal.action.package_update(**source_pkg) result = 'updated' sys.stdout.write(result + '\n') try: sys.stdout.flush() except IOError: break
def setup(self): self.sysadmin_user = factories.Sysadmin() self.normal_user = factories.User() self.org = factories.Organization( title='en org name | fr org name') self.sysadmin_action = LocalCKAN( username=self.sysadmin_user['name']).action self.normal_action = LocalCKAN( username=self.normal_user['name']).action self.action = LocalCKAN().action self.sysadmin_action.organization_member_create( username=self.normal_user['name'], id=self.org['name'], role='editor') self.incomplete_pkg = { 'type': 'dataset', 'collection': 'primary', 'title_translated': {'en': u'A Novel By Tolstoy'}, 'license_id': 'ca-ogl-lgo', 'ready_to_publish': 'true', 'imso_approval': 'true', 'jurisdiction': 'federal', 'maintainer_email': '*****@*****.**', 'restrictions': 'unrestricted', 'resources': [{ 'name_translated': {'en': u'Full text.', 'fr': u'Full text.'}, 'format': u'TXT', 'url': u'http://www.annakarenina.com/download/', 'size': 42, 'resource_type': 'dataset', 'language': ['zxx'], }], } self.complete_pkg = dict(self.incomplete_pkg, owner_org=self.org['name'], title_translated={ 'en': u'A Novel By Tolstoy', 'fr':u'Un novel par Tolstoy'}, frequency=u'as_needed', notes_translated={'en': u'...', 'fr': u'...'}, subject=[u'persons'], date_published=u'2013-01-01', keywords={'en': [u'book'], 'fr': [u'livre']}, )
def publish(self): lc = LocalCKAN(username=c.user) publish_date = date_str_to_datetime(request.str_POST['publish_date'] ).strftime("%Y-%m-%d %H:%M:%S") # get a list of package id's from the for POST data for key, package_id in request.str_POST.iteritems(): if key == 'publish': old = lc.action.package_show(id=package_id) lc.call_action('package_update', dict(old, portal_release_date=publish_date)) #return us to the publishing interface url = h.url_for(controller='ckanext.canada.controller:PublishController', action='search') redirect(url)
def test_invalid_url(self): lc = LocalCKAN() try: dataset = factories.Dataset() lc.call_action( 'resource_create', { 'package_id': dataset['name'], 'name': 'Test-File', 'url': 'https://example.com]' } ) except ValidationError as e: eq_( e.error_dict['url'], [u'Please provide a valid URL'] ) else: raise AssertionError('ValidationError not raised')
def _load_one_csv_file(self, name): path, csv_name = os.path.split(name) assert csv_name.endswith('.csv'), csv_name resource_name = csv_name[:-4] print resource_name chromo = get_chromo(resource_name) dataset_type = chromo['dataset_type'] method = 'upsert' if chromo.get('datastore_primary_key') else 'insert' lc = LocalCKAN() for org_name, records in csv_data_batch(name, chromo): results = lc.action.package_search( q='type:%s organization:%s' % (dataset_type, org_name), rows=2)['results'] if not results: print 'type:%s organization:%s not found!' % ( dataset_type, org_name) return 1 if len(results) > 1: print 'type:%s organization:%s multiple found!' % ( dataset_type, org_name) return 1 for r in results[0]['resources']: if r['name'] == resource_name: break else: print 'type:%s organization:%s missing resource:%s' % ( dataset_type, org_name, resource_name) return 1 print '-', org_name, len(records) lc.action.datastore_upsert( method=method, resource_id=r['id'], records=records) return 0
def rebuild_external_search(self): search_integration.rebuild_search_index(LocalCKAN(), self.options.unindexed_only, self.options.refresh_index)
def _portal_update(self, portal_ini, activity_date): if activity_date: past = re.match(PAST_RE, activity_date) if past: days, hours, minutes = ( int(x) if x else 0 for x in past.groups() ) activity_date = datetime.now() - timedelta( days=days, seconds=(hours * 60 + minutes) * 60 ) else: activity_date = isodate(activity_date, None) else: activity_date = datetime.now() - timedelta(days=7) log = None if self.options.log: log = open(self.options.log, 'a') registry = LocalCKAN() def changed_package_id_runs(start_date): while True: packages, next_date = self._changed_packages_since( registry, start_date) if next_date is None: return yield packages, next_date start_date = next_date cmd = [ sys.argv[0], 'canada', 'copy-datasets', '-c', portal_ini ] if self.options.mirror: cmd.append('-m') pool = worker_pool( cmd, self.options.processes, [], stop_when_jobs_done=False, stop_on_keyboard_interrupt=False, ) # Advance generator so we may call send() below pool.next() def append_log(finished, package_id, action, reason): if not log: return log.write(json.dumps([ datetime.now().isoformat(), finished, package_id, action, reason, ]) + '\n') log.flush() with _quiet_int_pipe(): append_log( None, None, "started updating from:", activity_date.isoformat() ) for packages, next_date in ( changed_package_id_runs(activity_date)): job_ids, finished, result = pool.send(enumerate(packages)) stats = completion_stats(self.options.processes) while result is not None: package_id, action, reason = json.loads(result) print job_ids, stats.next(), finished, package_id, \ action, reason append_log(finished, package_id, action, reason) job_ids, finished, result = pool.next() print " --- next batch starting at: " + next_date.isoformat() append_log( None, None, "next batch starting at:", next_date.isoformat() ) self._portal_update_activity_date = next_date.isoformat() self._portal_update_completed = True
def add_to_search_index(data_dict_id, in_bulk=False): log = logging.getLogger('ckan') od_search_solr_url = config.get(SEARCH_INTEGRATION_URL_OPTION, "") od_search_enabled = config.get(SEARCH_INTEGRATION_ENABLED_OPTION, False) od_search_od_url_en = config.get(SEARCH_INTEGRATION_OD_URL_EN_OPTION, "https://open.canada.ca/data/en/dataset/") od_search_od_url_fr = config.get(SEARCH_INTEGRATION_OD_URL_FR_OPTION, "https://ouvert.canada.ca/data/fr/dataset/") # Retrieve the full record - it has additional information including organization title and metadata modified date # that are not available in the regular data dict portal = LocalCKAN() data_dict = portal.action.package_show(id=data_dict_id) if not od_search_enabled: return try: subject_codes = scheming_choices_label_by_value(scheming_get_preset('canada_subject')['choices']) type_codes = scheming_choices_label_by_value(scheming_get_preset('canada_resource_related_type')['choices']) collection_codes = scheming_choices_label_by_value(scheming_get_preset('canada_collection')['choices']) juristiction_codes = scheming_choices_label_by_value(scheming_get_preset('canada_jurisdiction')['choices']) resource_type_codes = scheming_choices_label_by_value(scheming_get_preset('canada_resource_type')['choices']) frequency_codes = scheming_choices_label_by_value(scheming_get_preset('canada_frequency')['choices']) org_title = data_dict['organization']['title'].split('|') owner_org_title_en = org_title[0].strip() owner_org_title_fr = org_title[1].strip() subjects_en = [] subjects_fr = [] subjects = json.loads(data_dict['subject']) if \ isinstance(data_dict['subject'], str) else data_dict['subject'] for s in subjects: subjects_en.append(subject_codes['en'][s].replace(",", "")) subjects_fr.append(subject_codes['fr'][s].replace(",", "")) resource_type_en = [] resource_type_fr = [] resource_fmt = [] resource_title_en = [] resource_title_fr = [] for r in data_dict['resources']: resource_type_en.append( resource_type_codes['en'][r['resource_type']] if r['resource_type'] in resource_type_codes['en'] else '') resource_type_fr.append( resource_type_codes['fr'][r['resource_type']] if r['resource_type'] in resource_type_codes['fr'] else '') resource_fmt.append(r['format']) resource_name = json.loads(r['name_translated']) if \ isinstance(r['name_translated'], str) else r['name_translated'] if 'en' in resource_name: resource_title_en.append(resource_name['en']) elif 'en-t-fr' in resource_name: resource_title_en.append(resource_name['en-t-fr']) if 'fr' in resource_name: resource_title_fr.append(resource_name['fr'].strip()) elif 'fr-t-en' in resource_name: resource_title_fr.append(resource_name['fr-t-en'].strip()) display_options = [] if 'display_flags' in data_dict: for d in data_dict['display_flags']: display_options.append(d) notes_translated = json.loads(data_dict['notes_translated']) if \ isinstance(data_dict['notes_translated'], str) else data_dict['notes_translated'] title_translated = json.loads(data_dict['title_translated']) if \ isinstance(data_dict['title_translated'], str) else data_dict['title_translated'] od_obj = { 'portal_type_en_s': type_codes['en'][data_dict['type']], 'portal_type_fr_s': type_codes['fr'][data_dict['type']], 'collection_type_en_s': collection_codes['en'][data_dict['collection']], 'collection_type_fr_s': collection_codes['fr'][data_dict['collection']], 'jurisdiction_en_s': juristiction_codes['en'][data_dict['jurisdiction']], 'jurisdiction_fr_s': juristiction_codes['fr'][data_dict['jurisdiction']], 'owner_org_title_en_s': owner_org_title_en, 'owner_org_title_fr_s': owner_org_title_fr, 'subject_en_s': subjects_en, 'subject_fr_s': subjects_fr, 'resource_type_en_s': list(set(resource_type_en)), 'resource_type_fr_s': list(set(resource_type_fr)), 'update_cycle_en_s': frequency_codes['en'][data_dict['frequency']], 'update_cycle_fr_s': frequency_codes['fr'][data_dict['frequency']], 'id_name_s': data_dict['name'], 'id': data_dict['name'], 'owner_org_s': data_dict['owner_org'], 'author_txt': data_dict['author'] if 'author' in data_dict else '', 'description_txt_en': notes_translated['en'] if 'en' in data_dict['notes_translated'] else '', 'description_txt_fr': notes_translated['fr'] if 'fr' in data_dict['notes_translated'] else '', 'description_xlt_txt_fr': notes_translated['fr-t-en'] if 'fr-t-en' in notes_translated else '', 'description_xlt_txt_en': notes_translated['en-t-fr'] if 'en-t-fr' in notes_translated else '', 'title_en_s': title_translated['en'] if 'en' in title_translated else '', 'title_fr_s': title_translated['fr'] if 'fr' in title_translated else '', 'title_xlt_fr_s': title_translated['fr-t-en'] if 'fr-t-en' in title_translated else '', 'title_xlt_en_s': title_translated['en-t-fr'] if 'en-t-fr' in title_translated else '', 'resource_format_s': list(set(resource_fmt)), 'resource_title_en_s': resource_title_en, 'resource_title_fr_s': resource_title_fr, 'last_modified_tdt': parser.parse(data_dict['metadata_modified']).replace(microsecond=0).isoformat() + 'Z', 'published_tdt': parser.parse(data_dict['date_published']).replace(microsecond=0).isoformat() + 'Z', 'ogp_link_en_s': '{0}{1}'.format(od_search_od_url_en, data_dict['name']), 'ogp_link_fr_s': '{0}{1}'.format(od_search_od_url_fr, data_dict['name']), 'display_options_s': display_options } if 'en' in notes_translated: od_obj['desc_summary_txt_en'] = get_summary(notes_translated['en'].strip(), 'en') elif 'en-t-fr' in notes_translated: od_obj['desc_summary_txt_en'] = get_summary(notes_translated['en-t-fr'].strip(), 'en') if 'fr' in notes_translated: od_obj['desc_summary_txt_fr'] = get_summary(notes_translated['fr'].strip(), 'fr') elif 'en-t-fr' in notes_translated: od_obj['desc_summary_txt_fr'] = get_summary(notes_translated['fr-t-en'].strip(), 'fr') keywords = json.loads(data_dict['keywords']) if \ isinstance(data_dict['keywords'], str) else data_dict['keywords'] if 'en' in keywords: od_obj['keywords_en_s'] = keywords['en'] elif 'en-t-fr' in keywords: od_obj['keywords_xlt_en_s'] = keywords['en-t-fr'] if 'fr' in keywords: od_obj['keywords_fr_s'] = keywords['fr'] elif 'fr-t-en' in keywords: od_obj['keywords_xlt_fr_s'] = keywords['fr-t-en'] if 'data_series_issue_identification' in data_dict: if 'en' in data_dict['data_series_issue_identification']: od_obj['data_series_issue_identification_en'] = data_dict['data_series_issue_identification']['en'] else: od_obj['data_series_issue_ident_en'] = '-' if 'fr' in data_dict['data_series_issue_identification']: od_obj['data_series_issue_identification_fr'] = data_dict['data_series_issue_identification']['fr'] else: od_obj['data_series_issue_ident_fr'] = '-' else: od_obj['data_series_issue_ident_en'] = '-' od_obj['data_series_issue_ident_fr'] = '-' solr = pysolr.Solr(od_search_solr_url) if in_bulk: solr.add([od_obj]) else: solr.delete(id=od_obj['id']) solr.add([od_obj]) solr.commit() except Exception as x: log.error("Exception: {} {}".format(x.message, x.args))
def test_organization_schema_not_found(self): lc = LocalCKAN("visitor") with pytest.raises(NotFound): lc.action.scheming_organization_schema_show(type="elmo")
class TestNAVLSchema(FunctionalTestBase): def setup(self): self.sysadmin_user = factories.Sysadmin() self.normal_user = factories.User() self.org = factories.Organization( title='en org name | fr org name') self.sysadmin_action = LocalCKAN( username=self.sysadmin_user['name']).action self.normal_action = LocalCKAN( username=self.normal_user['name']).action self.action = LocalCKAN().action self.sysadmin_action.organization_member_create( username=self.normal_user['name'], id=self.org['name'], role='editor') self.incomplete_pkg = { 'type': 'dataset', 'collection': 'primary', 'title_translated': {'en': u'A Novel By Tolstoy'}, 'license_id': 'ca-ogl-lgo', 'ready_to_publish': 'true', 'imso_approval': 'true', 'jurisdiction': 'federal', 'maintainer_email': '*****@*****.**', 'restrictions': 'unrestricted', 'resources': [{ 'name_translated': {'en': u'Full text.', 'fr': u'Full text.'}, 'format': u'TXT', 'url': u'http://www.annakarenina.com/download/', 'size': 42, 'resource_type': 'dataset', 'language': ['zxx'], }], } self.complete_pkg = dict(self.incomplete_pkg, owner_org=self.org['name'], title_translated={ 'en': u'A Novel By Tolstoy', 'fr':u'Un novel par Tolstoy'}, frequency=u'as_needed', notes_translated={'en': u'...', 'fr': u'...'}, subject=[u'persons'], date_published=u'2013-01-01', keywords={'en': [u'book'], 'fr': [u'livre']}, ) def test_basic_package(self): assert_raises(ValidationError, self.normal_action.package_create, name='basic_package', **self.incomplete_pkg) resp = self.normal_action.package_create( name='basic_package', **self.complete_pkg) assert resp['title_translated']['fr'] == u'Un novel par Tolstoy' resp = self.action.package_show(id=resp['id']) assert resp['title_translated']['fr'] == u'Un novel par Tolstoy' def test_keyword_validation(self): assert_raises(ValidationError, self.normal_action.package_create, name='keyword_validation', **dict(self.complete_pkg, keywords={'en':['test'], 'fr':['not ok']})) assert_raises(ValidationError, self.normal_action.package_create, name='keyword_validation', **dict(self.complete_pkg, keywords={'en':['test'], 'fr':['one too short', 'q']})) assert_raises(ValidationError, self.normal_action.package_create, name='keyword_validation', **dict(self.complete_pkg, keywords={'en':['this is much too long' * 50], 'fr':['test']})) self.normal_action.package_create( name='keyword_validation', **dict(self.complete_pkg, keywords={'en':['these', 'ones', 'are', 'a-ok'], 'fr':['test']})) def test_custom_dataset_id(self): my_uuid = '3056920043b943f1a1fb9e7974cbb997' norm_uuid = '30569200-43b9-43f1-a1fb-9e7974cbb997' self.normal_action.package_create( name='custom_dataset_id', id=my_uuid, **self.complete_pkg) resp = self.action.package_show(id='custom_dataset_id') assert resp['id'] == norm_uuid assert resp['name'] == 'custom_dataset_id' assert_raises(ValidationError, self.sysadmin_action.package_create, name='repeated_dataset_id', id=my_uuid, **self.complete_pkg) assert_raises(ValidationError, self.sysadmin_action.package_create, name='invalid_dataset_id', id='my-custom-id', **self.complete_pkg) def test_raw_required(self): raw_pkg = dict(self.complete_pkg) del raw_pkg['title_translated'] assert_raises(ValidationError, self.normal_action.package_create, **raw_pkg) def test_tag_extras_bug(self): resp = self.normal_action.package_create( **self.complete_pkg) resp = self.action.package_show(id=resp['id']) assert 'subject' not in [e['key'] for e in resp.get('extras',[])] def test_keywords_with_apostrophe(self): self.normal_action.package_create( **dict(self.complete_pkg, keywords= {'en': ['test'], 'fr': ["emissions de l'automobile"]})) def test_invalid_resource_size(self): assert_raises(ValidationError, self.normal_action.package_create, **dict(self.complete_pkg, resources = [dict(self.complete_pkg['resources'][0], size='10M', )], ) ) def test_copy_org_name(self): pkg = self.normal_action.package_create(**self.complete_pkg) assert_equal(sorted(pkg['org_title_at_publication']), ['en', 'fr']) assert_equal(pkg['org_title_at_publication']['en'], 'en org name') assert_equal(pkg['org_title_at_publication']['fr'], 'fr org name') def test_dont_copy_org_name(self): pkg = self.normal_action.package_create(**dict( self.complete_pkg, org_title_at_publication={'en':'a', 'fr':'b'})) assert_equal(pkg['org_title_at_publication']['en'], 'a') assert_equal(pkg['org_title_at_publication']['fr'], 'b') def test_generated_fields(self): pkg = self.normal_action.package_create(**self.complete_pkg) # not generated, we set this one but later tests depend on it assert_equal(pkg['license_id'], 'ca-ogl-lgo') # this one is generated in the bowels of CKAN's model_dictize assert_equal(pkg['license_title'], 'Open Government Licence - Canada') raise SkipTest('XXX: not generating fields yet') # some we actually generate ourselves assert_equal(pkg['license_title_fra'], 'Licence du gouvernement ouvert - Canada') assert pkg['license_url_fra'] assert pkg['department_number'] def test_portal_release_date(self): raise SkipTest('XXX: portal_release_date not implemented yet') release_pkg = dict(self.complete_pkg, portal_release_date='2012-01-01') assert_raises(ValidationError, self.normal_action.package_create, **release_pkg) self.publisher_action.package_create(**release_pkg) self.sysadmin_action.package_create(**release_pkg) def test_spatial(self): raise SkipTest('XXX: spatial not implemented in raw schema') spatial_pkg = dict(self.complete_pkg, spatial='{"type": "Polygon", "coordinates": ' '[[[-141.001333, 41.736231], [-141.001333, 82.514468], ' '[-52.622540, 82.514468], [-52.622540, 41.736231], ' '[-141.001333, 41.736231]]]}') self.normal_action.package_create(**spatial_pkg) bad_spatial_pkg = dict(self.complete_pkg, spatial='{"type": "Line", "coordinates": ' '[[[-141.001333, 41.736231], [-141.001333, 82.514468], ' '[-52.622540, 82.514468], [-52.622540, 41.736231], ' '[-141.001333, 41.736231]]]}') assert_raises(ValidationError, self.normal_action.package_create, **bad_spatial_pkg) bad_spatial_pkg2 = dict(self.complete_pkg, spatial='forty') assert_raises(ValidationError, self.normal_action.package_create, **bad_spatial_pkg2) bad_spatial_pkg3 = dict(self.complete_pkg, spatial='{"type": "Polygon"}') self.assert_raises(ValidationError, self.normal_action.package_create, **bad_spatial_pkg3) bad_spatial_pkg4 = dict(self.complete_pkg, spatial='{"type": "Polygon", "coordinates": [1,2,3,4]}') self.assert_raises(ValidationError, self.normal_action.package_create, **bad_spatial_pkg4) def test_dont_change_portal_release_date(self): "normal users should not be able to reset the portal release date" raise SkipTest('XXX portal_release_date not yet implemented') resp = self.sysadmin_action.package_create( portal_release_date='2012-01-01', **self.complete_pkg) # silently ignore missing portal_release_date self.normal_action.package_update(id=resp['id'], **self.complete_pkg) resp2 = self.normal_action.package_show(id=resp['id']) assert_equal(resp['portal_release_date'], resp2.get('portal_release_date'))
def create_pd_record(self, owner_org, resource_name): lc = LocalCKAN(username=c.user) try: chromo = h.recombinant_get_chromo(resource_name) rcomb = lc.action.recombinant_show( owner_org=owner_org, dataset_type=chromo['dataset_type']) [res ] = [r for r in rcomb['resources'] if r['name'] == resource_name] check_access('datastore_upsert', { 'user': c.user, 'auth_user_obj': c.userobj }, {'resource_id': res['id']}) except NotAuthorized: return abort(403, _('Unauthorized')) choice_fields = { f['datastore_id']: [{ 'value': k, 'label': v } for (k, v) in f['choices']] for f in h.recombinant_choice_fields(resource_name) } pk_fields = aslist(chromo['datastore_primary_key']) if request.method == 'POST': post_data = parse_params(request.POST, ignore_keys=['save']) if 'cancel' in post_data: return redirect( h.url_for( controller= 'ckanext.recombinant.controller:UploadController', action='preview_table', resource_name=resource_name, owner_org=rcomb['owner_org'], )) data, err = clean_check_type_errors(post_data, chromo['fields'], pk_fields, choice_fields) try: lc.action.datastore_upsert(resource_id=res['id'], method='insert', records=[{ k: None if k in err else v for (k, v) in data.items() }], dry_run=bool(err)) except ValidationError as ve: if 'records' in ve.error_dict: err = dict( { k: [_(e) for e in v] for (k, v) in ve.error_dict['records'][0].items() }, **err) elif ve.error_dict.get('info', {}).get('pgcode', '') == '23505': err = dict( { k: [_("This record already exists")] for k in pk_fields }, **err) if err: return render('recombinant/create_pd_record.html', extra_vars={ 'data': data, 'resource_name': resource_name, 'chromo_title': chromo['title'], 'choice_fields': choice_fields, 'owner_org': rcomb['owner_org'], 'errors': err, }) h.flash_notice(_(u'Record Created')) return redirect( h.url_for( controller= 'ckanext.recombinant.controller:UploadController', action='preview_table', resource_name=resource_name, owner_org=rcomb['owner_org'], )) return render('recombinant/create_pd_record.html', extra_vars={ 'data': {}, 'resource_name': resource_name, 'chromo_title': chromo['title'], 'choice_fields': choice_fields, 'owner_org': rcomb['owner_org'], 'errors': {}, })
def test_dataset_schema_show(self): lc = LocalCKAN('visitor') schema = lc.action.scheming_dataset_schema_show(type='test-schema') assert schema['dataset_fields'][2]['label'] == 'Humps'
def test_dataset_schema_list(self): lc = LocalCKAN('visitor') dataset_schemas = lc.action.scheming_dataset_schema_list() assert 'test-schema' in dataset_schemas
def test_dataset_schema_not_found(self): lc = LocalCKAN('visitor') assert_raises(NotFound, lc.action.scheming_dataset_schema_show, type='ernie')
def test_blank(self): lc = LocalCKAN() assert_raises(ValidationError, lc.action.datastore_upsert, resource_id=self.resource_id, records=[{}])
def test_example(self): lc = LocalCKAN() record = get_chromo('grants')['examples']['record'] lc.action.datastore_upsert(resource_id=self.resource_id, records=[record])
def _migrate(args): path = args['<path_to_storage>'] single_id = args['<resource_id>'] if not os.path.isdir(path): print('The storage directory cannot be found.') return lc = LocalCKAN() resources = {} failed = [] # The resource folder is stuctured like so on disk: # - storage/ # - ... # - resources/ # - <3 letter prefix> # - <3 letter prefix> # - <remaining resource_id as filename> # ... # ... # ... for root, dirs, files in os.walk(path): # Only the bottom level of the tree actually contains any files. We # don't care at all about the overall structure. if not files: continue split_root = root.split('/') resource_id = split_root[-2] + split_root[-1] for file_ in files: ckan_res_id = resource_id + file_ if single_id and ckan_res_id != single_id: continue resources[ckan_res_id] = os.path.join(root, file_) for i, resource in enumerate(resources.iteritems(), 1): resource_id, file_path = resource print('[{i}/{count}] Working on {id}'.format(i=i, count=len(resources), id=resource_id)) try: resource = lc.action.resource_show(id=resource_id) except NotFound: print(u'\tResource not found') continue if resource['url_type'] != 'upload': print(u'\t`url_type` is not `upload`. Skip') continue with open(file_path, 'rb') as fin: resource['upload'] = FakeFileStorage( fin, resource['url'].split('/')[-1]) try: uploader = ResourceCloudStorage(resource) uploader.upload(resource['id']) except Exception as e: failed.append(resource_id) print(u'\tError of type {0} during upload: {1}'.format( type(e), e)) if failed: log_file = tempfile.NamedTemporaryFile(delete=False) log_file.file.writelines(failed) print(u'ID of all failed uploads are saved to `{0}`'.format( log_file.name))
def delete_vocabulary(self, name): registry = LocalCKAN() vocab = registry.action.vocabulary_show(id=name) for t in vocab['tags']: registry.action.tag_delete(id=t['id']) registry.action.vocabulary_delete(id=vocab['id'])
def datatable(self, resource_name, resource_id): draw = int(request.params['draw']) search_text = unicode(request.params['search[value]']) offset = int(request.params['start']) limit = int(request.params['length']) chromo = h.recombinant_get_chromo(resource_name) lc = LocalCKAN(username=c.user) try: unfiltered_response = lc.action.datastore_search( resource_id=resource_id, limit=1, ) except NotAuthorized: # datatables js can't handle any sort of error response # return no records instead return json.dumps({ 'draw': draw, 'iTotalRecords': -1, # with a hint that something is wrong 'iTotalDisplayRecords': -1, 'aaData': [], }) cols = [f['datastore_id'] for f in chromo['fields']] prefix_cols = 1 if chromo.get('edit_form', False) else 0 sort_list = [] i = 0 while True: if u'order[%d][column]' % i not in request.params: break sort_by_num = int(request.params[u'order[%d][column]' % i]) sort_order = (u'desc NULLS LAST' if request.params[u'order[%d][dir]' % i] == u'desc' else u'asc NULLS LAST') sort_list.append(cols[sort_by_num - prefix_cols] + u' ' + sort_order) i += 1 response = lc.action.datastore_search( q=search_text, resource_id=resource_id, offset=offset, limit=limit, sort=u', '.join(sort_list), ) aadata = [[ datatablify(row.get(colname, u''), colname) for colname in cols ] for row in response['records']] if chromo.get('edit_form', False): res = lc.action.resource_show(id=resource_id) pkg = lc.action.package_show(id=res['package_id']) fids = [f['datastore_id'] for f in chromo['fields']] pkids = [ fids.index(k) for k in aslist(chromo['datastore_primary_key']) ] for row in aadata: row.insert( 0, (u'<a href="{0}" aria-label"' + _("Edit") + '">' u'<i class="fa fa-lg fa-edit" aria-hidden="true"></i></a>' ).format( h.url_for( controller= 'ckanext.canada.controller:PDUpdateController', action='update_pd_record', owner_org=pkg['organization']['name'], resource_name=resource_name, pk=','.join( url_part_escape(row[i]) for i in pkids)))) return json.dumps({ 'draw': draw, 'iTotalRecords': unfiltered_response.get('total', 0), 'iTotalDisplayRecords': response.get('total', 0), 'aaData': aadata, })
def test_group_schema_show(self): lc = LocalCKAN("visitor") schema = lc.action.scheming_group_schema_show(type="group") assert schema["fields"][4]["label"] == "Bookface"
def update_pd_record(self, owner_org, resource_name, pk): pk = [url_part_unescape(p) for p in pk.split(',')] lc = LocalCKAN(username=c.user) try: chromo = h.recombinant_get_chromo(resource_name) rcomb = lc.action.recombinant_show( owner_org=owner_org, dataset_type=chromo['dataset_type']) [res ] = [r for r in rcomb['resources'] if r['name'] == resource_name] check_access('datastore_upsert', { 'user': c.user, 'auth_user_obj': c.userobj }, {'resource_id': res['id']}) except NotAuthorized: abort(403, _('Unauthorized')) choice_fields = { f['datastore_id']: [{ 'value': k, 'label': v } for (k, v) in f['choices']] for f in h.recombinant_choice_fields(resource_name) } pk_fields = aslist(chromo['datastore_primary_key']) pk_filter = dict(zip(pk_fields, pk)) records = lc.action.datastore_search(resource_id=res['id'], filters=pk_filter)['records'] if len(records) == 0: abort(404, _('Not found')) if len(records) > 1: abort(400, _('Multiple records found')) record = records[0] if request.method == 'POST': post_data = parse_params(request.POST, ignore_keys=['save'] + pk_fields) if 'cancel' in post_data: return redirect( h.url_for( controller= 'ckanext.recombinant.controller:UploadController', action='preview_table', resource_name=resource_name, owner_org=rcomb['owner_org'], )) data, err = clean_check_type_errors(post_data, chromo['fields'], pk_fields, choice_fields) # can't change pk fields for f_id in data: if f_id in pk_fields: data[f_id] = record[f_id] try: lc.action.datastore_upsert( resource_id=res['id'], #method='update', FIXME not raising ValidationErrors records=[{ k: None if k in err else v for (k, v) in data.items() }], dry_run=bool(err)) except ValidationError as ve: err = dict( { k: [_(e) for e in v] for (k, v) in ve.error_dict['records'][0].items() }, **err) if err: return render('recombinant/update_pd_record.html', extra_vars={ 'data': data, 'resource_name': resource_name, 'chromo_title': chromo['title'], 'choice_fields': choice_fields, 'pk_fields': pk_fields, 'owner_org': rcomb['owner_org'], 'errors': err, }) h.flash_notice(_(u'Record %s Updated') % u','.join(pk)) return redirect( h.url_for( controller= 'ckanext.recombinant.controller:UploadController', action='preview_table', resource_name=resource_name, owner_org=rcomb['owner_org'], )) data = {} for f in chromo['fields']: if not f.get('import_template_include', True): continue val = record[f['datastore_id']] data[f['datastore_id']] = val return render('recombinant/update_pd_record.html', extra_vars={ 'data': data, 'resource_name': resource_name, 'chromo_title': chromo['title'], 'choice_fields': choice_fields, 'pk_fields': pk_fields, 'owner_org': rcomb['owner_org'], 'errors': {}, })
def test_organization_schema_list(self): lc = LocalCKAN("visitor") org_schemas = lc.action.scheming_organization_schema_list() assert sorted(org_schemas) == ["organization", "publisher"]
def copy_datasets(self, remote, package_ids=None): """ a process that accepts package ids on stdin which are passed to the package_show API on the remote CKAN instance and compared to the local version of the same package. The local package is then created, updated, deleted or left unchanged. This process outputs that action as a string 'created', 'updated', 'deleted' or 'unchanged' """ if self.options.push_apikey and not self.options.fetch: registry = LocalCKAN() portal = RemoteCKAN(remote, apikey=self.options.push_apikey) elif self.options.fetch: registry = RemoteCKAN(remote) portal = LocalCKAN() else: print "exactly one of -f or -a options must be specified" return now = datetime.now() if not package_ids: package_ids = iter(sys.stdin.readline, '') for package_id in package_ids: package_id = package_id.strip() reason = None target_deleted = False try: source_pkg = registry.action.package_show(id=package_id) except NotAuthorized: source_pkg = None except (CKANAPIError, urllib2.URLError), e: sys.stdout.write(json.dumps([package_id, 'source error', unicode(e.args)]) + '\n') raise if source_pkg and source_pkg['state'] == 'deleted': source_pkg = None if source_pkg and source_pkg['type'] != 'dataset': # non-default dataset types ignored source_pkg = None _trim_package(source_pkg) if source_pkg and not self.options.mirror: # treat unpublished packages same as deleted packages if not source_pkg['portal_release_date']: source_pkg = None reason = 'release date not set' elif isodate(source_pkg['portal_release_date'], None) > now: source_pkg = None reason = 'release date in future' try: target_pkg = portal.call_action('package_show', {'id':package_id}) except (NotFound, NotAuthorized): target_pkg = None except (CKANAPIError, urllib2.URLError), e: sys.stdout.write(json.dumps([package_id, 'target error', unicode(e.args)]) + '\n') raise
def copy_datasets(self, remote, package_ids=None): """ a process that accepts packages on stdin which are compared to the local version of the same package. The local package is then created, updated, deleted or left unchanged. This process outputs that action as a string 'created', 'updated', 'deleted' or 'unchanged' """ portal = LocalCKAN() now = datetime.now() packages = iter(sys.stdin.readline, '') for package in packages: source_pkg = json.loads(package) package_id = source_pkg['id'] reason = None target_deleted = False if source_pkg and source_pkg['state'] == 'deleted': source_pkg = None if source_pkg and source_pkg['type'] not in DATASET_TYPES: # non-default dataset types ignored source_pkg = None _trim_package(source_pkg) action = None if source_pkg and not self.options.mirror: if source_pkg.get('ready_to_publish') == 'false': source_pkg = None reason = 'marked not ready to publish' elif not source_pkg.get('portal_release_date'): source_pkg = None reason = 'release date not set' elif isodate(source_pkg['portal_release_date'], None) > now: source_pkg = None reason = 'release date in future' else: # portal packages published public source_pkg['private'] = False if action != 'skip': try: target_pkg = portal.call_action('package_show', { 'id': package_id }) except (NotFound, NotAuthorized): target_pkg = None except (CKANAPIError, urllib2.URLError), e: sys.stdout.write( json.dumps([ package_id, 'target error', unicode(e.args) ]) + '\n' ) raise if target_pkg and target_pkg['state'] == 'deleted': target_pkg = None target_deleted = True _trim_package(target_pkg) if action == 'skip': pass elif target_pkg is None and source_pkg is None: action = 'unchanged' reason = reason or 'deleted on registry' elif target_deleted: action = 'updated' reason = 'undeleting on target' portal.action.package_update(**source_pkg) elif target_pkg is None: action = 'created' portal.action.package_create(**source_pkg) elif source_pkg is None: action = 'deleted' portal.action.package_delete(id=package_id) elif source_pkg == target_pkg: action = 'unchanged' reason = 'no difference found' else: action = 'updated' portal.action.package_update(**source_pkg) sys.stdout.write(json.dumps([package_id, action, reason]) + '\n') sys.stdout.flush()
def test_group_schema_not_found(self): lc = LocalCKAN("visitor") with pytest.raises(NotFound): lc.action.scheming_group_schema_show(type="bert")
def test_group_schema_show(self): lc = LocalCKAN('visitor') schema = lc.action.scheming_group_schema_show(type='group') assert_equals(schema['fields'][4]['label'], 'Bookface')
def test_organization_schema_show(self): lc = LocalCKAN("visitor") schema = lc.action.scheming_organization_schema_show( type="organization") assert schema["fields"][4]["label"] == "Department ID"
def test_group_schema_not_found(self): lc = LocalCKAN('visitor') assert_raises(NotFound, lc.action.scheming_group_schema_show, type='bert')
def test_group_schema_list(self): lc = LocalCKAN("visitor") group_schemas = lc.action.scheming_group_schema_list() assert sorted(group_schemas) == ["group", "theme"]
def test_organization_schema_list(self): lc = LocalCKAN('visitor') org_schemas = lc.action.scheming_organization_schema_list() assert_equals(org_schemas, ['organization'])
from datetime import datetime from ckan.lib import mailer import ckan.plugins.toolkit as t from ckanapi import LocalCKAN, ValidationError registry = LocalCKAN() def upload_to_ckan(package_id, filename): try: resource = registry.action.resource_create(package_id=package_id, upload=open(filename, 'rb')) email_notification_recipients = t.aslist( t.config.get('ckanext.prh_tools.mail_recipients', '')) site_title = t.config.get('ckan.site_title', '') today = datetime.now().date().isoformat() msg = '%(site_title)s - PRH data uploaded %(today)s\n\n%(status)s' % { 'site_title': site_title, 'today': today, 'status': "New data available in https://www.avoindata.fi/data/dataset/%s/resource/%s" % (package_id, resource.get('id')) }
def test_organization_schema_show(self): lc = LocalCKAN('visitor') schema = lc.action.scheming_organization_schema_show( type='organization') assert_equals(schema['fields'][4]['label'], 'Department ID')
def test_organization_schema_not_found(self): lc = LocalCKAN('visitor') assert_raises(NotFound, lc.action.scheming_organization_schema_show, type='elmo')
def load_suggested(self, use_created_date, filename): """ a process that loads suggested datasets from Drupal into CKAN """ registry = LocalCKAN() # load packages as dict results = True counter = 0 batch_size = 100 existing_suggestions = {} while results: packages = registry.action.package_search(q='type:prop', start=counter, rows=batch_size, include_private=True)['results'] if packages: for package in packages: existing_suggestions[package['id']] = package counter += len(packages) else: results = False # load data from csv csv_file = io.open(filename, "r", encoding='utf-8-sig') csv_reader = csv.DictReader((l.encode('utf-8') for l in csv_file)) today = datetime.now().strftime('%Y-%m-%d') for row in csv_reader: uuid = row['uuid'] if uuid in existing_suggestions: continue if use_created_date: today = row['date_created'] # add record record = { "type": "prop", "state": "active", "id": uuid, "title_translated": { "en": row['title_en'], "fr": row['title_fr'] }, "owner_org": row['organization'], "notes_translated": { "en": row['description_en'], "fr": row['description_fr'], }, "comments": { "en": row['additional_comments_and_feedback_en'], "fr": row['additional_comments_and_feedback_fr'] }, "reason": row['reason'], "subject": row['subject'].split(',') if row['subject'] else ['information_and_communications'], "keywords": { "en": row['keywords_en'].split(',') if row['keywords_en'] else ['dataset'], "fr": row['keywords_fr'].split(',') if row['keywords_fr'] else ['Jeu de données'], }, "date_submitted": row['date_created'], "date_forwarded": today, "status": [] if row['dataset_suggestion_status'] == 'department_contacted' else [ { "reason": row['dataset_suggestion_status'], "date": row['dataset_released_date'] if row['dataset_released_date'] else today, "comments": { "en": row['dataset_suggestion_status_link'] or u'Status imported from previous ‘suggest a dataset’ system', "fr": row['dataset_suggestion_status_link'] or u'État importé du système précédent « Proposez un jeu de données »', } } ] } try: registry.action.package_create(**record) print uuid + ' suggested dataset created' except ValidationError as e: if 'id' in e.error_dict: try: registry.action.package_update(**record) print uuid + ' suggested dataset update deleted' except ValidationError as e: print uuid + ' (update deleted) ' + str(e) else: print uuid + ' ' + str(e) csv_file.close()
def test_group_schema_list(self): lc = LocalCKAN('visitor') group_schemas = lc.action.scheming_group_schema_list() assert_equals(group_schemas, ['group'])