def test_powerview_add_resource_valid(self): '''Adding a resource to powerview changes the resource list returned for the powerview.''' sysadmin = Sysadmin() r1 = Resource() create_dict = factories.PowerView() nosetools.assert_equal(create_dict['resources'], []) nosetools.assert_equal(PowerviewResourceAssociation.count(), 0) toolkit.get_action('powerview_add_resource')( context={'user': sysadmin['name']}, data_dict={ 'id': create_dict['id'], 'resource_id': r1['id'] } ) updated_dict = toolkit.get_action('powerview_show')( context={'user': sysadmin['name']}, data_dict={'id': create_dict['id']} ) nosetools.assert_equal(PowerviewResourceAssociation.count(), 1) nosetools.assert_equal(updated_dict['resources'], [r1['id']])
def test_powerview_add_resource_multiple_add(self): '''Attempt to add resource multiple times to same powerview raises error.''' sysadmin = Sysadmin() r1 = Resource() create_dict = factories.PowerView() nosetools.assert_equal(create_dict['resources'], []) toolkit.get_action('powerview_add_resource')( context={'user': sysadmin['name']}, data_dict={ 'id': create_dict['id'], 'resource_id': r1['id'] } ) nosetools.assert_equal(PowerviewResourceAssociation.count(), 1) # try to add resources to same powerview again... with nosetools.assert_raises(ValidationError): toolkit.get_action('powerview_add_resource')( context={'user': sysadmin['name']}, data_dict={ 'id': create_dict['id'], 'resource_id': r1['id'] } )
def m19115store_create(context, data_dict): '''Adds a new 19115store. **Params:** :name [String]: 19115store name. :api_url [String]: api url. :package_id [String] Existing package id. **Results:** :returns: The newly created data object. :rtype: dictionary ''' if not 'name' in data_dict: raise p.toolkit.ValidationError({'name': ['name required']}) if not 'api_url' in data_dict: raise p.toolkit.ValidationError({'api_url': ['api_url required']}) package_create_response = toolkit.get_action('package_create')(context, data_dict) log.info(package_create_response) if package_create_response['id'] is None: raise p.toolkit.ValidationError({'package_id': ['package_id is none']}) package_id = package_create_response['id'] resource_name = data_dict.get('name').lower() resource_url = data_dict.get('api_url') resource_webstore_url = data_dict.get('api_url') resource_dict = {'package_id': package_id,'name':resource_name,'url':resource_url,'webstore_url':resource_webstore_url,'resource_type':'19115store','format':'JSON'} resource_create_response = toolkit.get_action('resource_create')(context, resource_dict) log.info(resource_create_response) return {'package_id': package_create_response['id']}
def test_powerview_add_resource_add_resource_to_existing_list(self): '''Adding a resource to powerview maintains existing resources.''' sysadmin = Sysadmin() r1 = Resource() r2 = Resource() r3 = Resource() create_dict = factories.PowerView(resources=[r1['id'], r2['id']]) nosetools.assert_equal(set(create_dict['resources']), set([r1['id'], r2['id']])) nosetools.assert_equal(PowerviewResourceAssociation.count(), 2) toolkit.get_action('powerview_add_resource')( context={'user': sysadmin['name']}, data_dict={ 'id': create_dict['id'], 'resource_id': r3['id'] } ) updated_dict = toolkit.get_action('powerview_show')( context={'user': sysadmin['name']}, data_dict={'id': create_dict['id']} ) nosetools.assert_equal(PowerviewResourceAssociation.count(), 3) nosetools.assert_equal(set(updated_dict['resources']), set([r1['id'], r2['id'], r3['id']]))
def test_harvest_db_logger(self): # Create source and check if harvest_log table is populated data_dict = SOURCE_DICT.copy() data_dict['source_type'] = 'test' source = factories.HarvestSourceObj(**data_dict) content = 'Harvest source created: %s' % source.id log = harvest_model.Session.query(harvest_model.HarvestLog).\ filter(harvest_model.HarvestLog.content==content).first() self.assertIsNotNone(log) self.assertEqual(log.level, 'INFO') context = { 'model': model, 'session': model.Session, 'ignore_auth': True, } data = toolkit.get_action('harvest_log_list')(context, {}) self.assertTrue(len(data) > 0) self.assertIn('level', data[0]) self.assertIn('content', data[0]) self.assertIn('created', data[0]) self.assertTrue(data[0]['created'] > data[1]['created']) per_page = 1 data = toolkit.get_action('harvest_log_list')(context, {'level': 'info', 'per_page': per_page}) self.assertEqual(len(data), per_page) self.assertEqual(data[0]['level'], 'INFO')
def purge_all(self): response = self.ask( 'You are about to remove all datasets and datastore tables. Are you sure you want to continue?' ) # If user confirms the action, we're going to rename the tables in a single transaction if response: pkgs = toolkit.get_action('current_package_list_with_resources')(self.context, {}) for pkg_dict in pkgs: for resource in pkg_dict['resources']: try: toolkit.get_action('datastore_delete')(self.context, {'resource_id': resource['id'], 'force': True}) except logic.NotFound: # Ignore missing datastore tables # Load the package model and delete pkg = model.Package.get(pkg_dict['id']) if pkg: rev = model.repo.new_revision() pkg.purge() model.repo.commit_and_remove() print '%s purged' % pkg_dict['name']
def delete(self, dataset_id, issue_number): dataset = self._before_dataset(dataset_id) if 'cancel' in request.params: h.redirect_to('issues_show', dataset_id=dataset_id, issue_number=issue_number) if request.method == 'POST': try: toolkit.get_action('issue_delete')( data_dict={'issue_number': issue_number, 'dataset_id': dataset_id} ) except toolkit.NotAuthorized: msg = _('Unauthorized to delete issue {0}'.format( issue_number)) toolkit.abort(401, msg) h.flash_notice( _('Issue {0} has been deleted.'.format(issue_number)) ) h.redirect_to('issues_dataset', dataset_id=dataset_id) else: return render('issues/confirm_delete.html', extra_vars={ 'issue_number': issue_number, 'pkg': dataset, })
def before_view(self, pkg_dict): if not self.is_supported_package_type(pkg_dict): return pkg_dict # create resource views if necessary user = tk.get_action('get_site_user')({'ignore_auth': True}, {}) context = { 'model': model, 'session': model.Session, 'user': user['name'] } tk.check_access('package_create_default_resource_views', context) # get the dataset via API, as the pkg_dict does not contain all fields dataset = tk.get_action('package_show')( context, {'id': pkg_dict['id']} ) # Make sure resource views are created before showing a dataset tk.get_action('package_create_default_resource_views')( context, {'package': dataset} ) return pkg_dict
def command(self): """ """ self._load_config() log = logging.getLogger(__name__) import ckan.model as model log.info('ADDING GROUPS (Eurovoc Domains)') root = ET.parse('ckanext/eurovoc/eurovoc_xml/dom_en.xml').getroot() for record in root.iter('RECORD'): id = record.find('DOMAINE_ID').text title = record.find('LIBELLE').text.title() name = slugify(title).lower() desc = 'Eurovoc Domain: ' + id + ' ' + title grp_dict = {'id': id, 'title': title, 'name': name, 'type': 'group', 'extras': [{'key': 'Eurovoc Domain', 'value': title}, {'key': 'Eurovoc Domain ID', 'value': id}]} log.info('Creating group: ' + id + ' - ' + title) context = {'user': '******', 'model': model, 'session': model.Session} try: toolkit.get_action('group_create')(context, grp_dict) except: pass log.info('ADDING VOCABULARY THESAURUS') context = {'user': '******', 'model': model, 'session': model.Session} voc_dict = {'name': 'eurovoc_thesaurus'} try: voc = toolkit.get_action('vocabulary_create')(context, voc_dict) except ValidationError, e: voc = toolkit.get_action('vocabulary_show')(context, {'id': 'eurovoc_thesaurus'})
def get_package_dict(datasetID): user = tk.get_action('get_site_user')({}, {}) context = {'user': user['name']} try: return tk.get_action('package_show')(context, {'id': datasetID}) except: return {}
def create_orgs(organization_id, site_user): api_url = config.get('ckanext.glasgow.metadata_api', '').rstrip('/') api_endpoint = '{}/Metadata/Organisation/{}'.format(api_url, organization_id) request = requests.get(api_endpoint, verify=False) try: result = _fetch_from_ec(request) org = result['MetadataResultSet'][0] except (KeyError, IndexError): print 'failed to fetch org {} from EC. Response {}'.format(organization_id, str(result)) return context = { 'model': model, 'session': model.Session, 'user': site_user, 'local_action': True, } org_name = get_org_name(org, 'Title') data_dict = { 'id': org['Id'], 'title': org['Title'], 'name': org_name, } try: toolkit.get_action('organization_create')(context, data_dict) context.pop('local_action', None) return toolkit.get_action('organization_show')(context, {id: 'organization_id'}) except toolkit.ValidationError: print 'failed to create org {}'.format(organization_id)
def _search_issues(dataset_id, status=issuemodel.ISSUE_STATUS.open, sort='newest', spam_state=None, q='', page=1, per_page=get_issues_per_page()[0]): # use the function params to set default for our arguments to our # data_dict if needed params = locals().copy() # convert per_page, page parameters to api limit/offset limit = per_page offset = (page - 1) * limit params.pop('page', None) params.pop('per_page', None) params['offset'] = offset issues = toolkit.get_action('issue_search')(data_dict=params) issue_count = toolkit.get_action('issue_count')(data_dict=params) pagination = Pagination(page, limit, issue_count) template_variables = { 'issues': issues, 'status': status, 'sort': sort, 'q': q, 'pagination': pagination, } if spam_state: template_variables['spam_state'] = spam_state return template_variables
def command(cls, config_ini, org_names): common.load_config(config_ini) common.register_translator() from ckan.plugins import toolkit from ckan import model orgs = [toolkit.get_action('organization_show')( data_dict={'id': org_name}) for org_name in org_names] source_org, dest_org = orgs assert source_org assert dest_org search_results = toolkit.get_action('package_search')( data_dict=dict(fq='publisher:%s' % source_org['name'], rows=1000)) print 'Datasets: %s' % search_results['count'] stats = Stats() if len(search_results['results']) != search_results['count']: assert 0, 'need to implement paging' #context = { # 'user': get_script_user(__name__)['name'], # 'ignore_auth': True, # 'model': model} rev = model.repo.new_revision() rev.author = 'script-%s.py' % __file__ for dataset in search_results['results']: model.Package.get(dataset['id']).owner_org = dest_org['id'] #dataset_ = toolkit.get_action('package_patch')( # context=context, # data_dict=dict(id=dataset['id'], owner_org=dest_org['id'])) print stats.add('Changed owner_org', dataset['name']) print stats.report() print 'Writing' model.Session.commit()
def ccca_get_orgs (): """ Delivers an user-dependent list of organizations and users""" try: all_users = tk.get_action('user_list')({},{}) except: return None # make the return dict user_orgs = {} for user in all_users: orgs_for_user = [] try: u_orgs = tk.get_action('organization_list_for_other_user')({},{'user_id':user['id']}) except: continue for u_org in u_orgs: org = u_org['organization'] org_sum = {} org_sum['name'] = org['name'] org_sum['display_name'] = org['display_name'] org_sum['url'] = h.url_for(controller='organization', action='read', id=org['name']) orgs_for_user.append(org_sum) user_orgs[user['name']] = orgs_for_user return user_orgs
def _asset_from_user(self, data, additional): # check for image for field in ('url',): if not field in data: raise AssetAbsentFieldsException(field) # create resource if not exists if not additional['resources']: new_id = _make_uuid() parent = toolkit.get_action('resource_create')(additional['context'], { 'package_id':additional['package_id'], 'id':new_id, 'url': _site_url() + '/datastore/dump/' + new_id, 'name':'Assets', 'resource_type':'asset', }) # get resource if exists else: parent = toolkit.get_action('resource_show')(additional['context'], {'id': additional['resources'][0].id}) parent_id = parent['id'] # create datastore if not exists if not parent.get('datastore_active'): _default_datastore_create(additional['context'], parent_id) additional['parent_id'] = parent_id # add asset to datastore return self._add_new_asset(data, additional)
def review(self, id): """ sends review notification to all journal admins """ context = self._context() try: tk.check_access('package_update', context, {'id': id}) except tk.NotAuthorized: tk.abort(403, 'Unauthorized') c.pkg_dict = tk.get_action('package_show')(context, {'id': id}) # avoid multiple notifications (eg. when someone calls review directly) if c.pkg_dict.get('dara_edawax_review', 'false') == 'true': h.flash_error("Package has already been sent to review") redirect(id) user_name = tk.c.userobj.fullname or tk.c.userobj.email admins = get_group_or_org_admin_ids(c.pkg_dict['owner_org']) addresses = map(lambda admin_id: model.User.get(admin_id).email, admins) note = n.review(addresses, user_name, id) if note: c.pkg_dict['dara_edawax_review'] = 'true' tk.get_action('package_update')(context, c.pkg_dict) h.flash_success('Notification to Editors sent.') else: h.flash_error('ERROR: Mail could not be sent. Please try again later or contact the site admin.') redirect(id)
def download_and_extract(*args, **kwargs): # Simulate a change to the package by another party during # the download and extraction process. toolkit.get_action('package_patch')({'user': sysadmin['name']}, {'id': res_dict['package_id'], 'title': 'A changed title'}) return {'fulltext': 'foobar'}
def test_update_view_action_success(self, flash_mock): """Test the create view action directly (successfull test)""" resource_view_create = toolkit.get_action('resource_view_create') resource_view_update = toolkit.get_action('resource_view_update') # First create a resource data_dict = dict(self.base_data_dict.items() + {'title': 'test4'}.items()) resource_view = resource_view_create(TestViewCreated.context, data_dict) # Now try to update it! data_dict['id'] = resource_view['id'] data_dict['longitude_field'] = 'long2' resource_view_update(TestViewCreated.context, data_dict) # Check we have lat/long values. This is done more extensively in test_actions. metadata = MetaData() table = Table(self.resource['resource_id'], metadata, autoload=True, autoload_with=TestViewCreated.engine) s = select([ table.c['latitude'], table.c['long2'], func.st_x(table.c['_geom']).label('x'), func.st_y(table.c['_geom']).label('y'), ]).where(table.c['_the_geom_webmercator'] != None) r = TestViewCreated.engine.execute(s) try: assert_equal(r.rowcount, 2) for row in r: assert_equal(float(row['x']), float(row['long2'])) assert_equal(float(row['y']), float(row['latitude'])) except: raise finally: r.close() # Check we have a message to inform us all went well assert_true(flash_mock.called) assert_equal(flash_mock.call_args[1]['category'], 'alert-success')
def resource_schema_fkey_delete(context, data_dict): '''Delete a resource's schema's foreign key. :param resource_id: the ID of the resource :type resource_id: string :param fkey_uid: the fkey_uid of the foreign key to delete :type fields: string ''' try: data_dict, errors = dictization_functions.validate(data_dict, schema.resource_schema_fkey_delete_schema(), context) except exceptions.InvalidResourceIDException: raise toolkit.ValidationError(toolkit._("Invalid resource_id")) if errors: raise toolkit.ValidationError(errors) resource_id = data_dict['resource_id'] schema_ = toolkit.get_action('resource_schema_show')(context, {'resource_id': resource_id}) current = schema_.get('foreignKeys', []) fkeys = [i for i in current if i['fkey_uid'] != data_dict['fkey_uid']] schema_['foreignKeys'] = fkeys schema_ = json.dumps(schema_) resource_dict = toolkit.get_action('resource_show')(context, {'id': resource_id}) toolkit.get_action('resource_update')(context, {'id': resource_id, 'url': resource_dict['url'], 'name': resource_dict['name'], 'schema': schema_})
def update_package_published_status(package_id, status): ''' Updates ths published status for a given package_id status: True -> set published status to true False -> set published status to false ''' pkg = toolkit.get_action('package_show')(None, {'id': package_id}) extras = pkg.get('extras', []) for extra in extras: key = extra.get('key', None) if key == 'published': extras.remove(extra) tags = pkg.get('tags') if status: tags.append({'name':'published'}) new_dict = {u'key': u'published', u'value': u'true'} else: for tag in tags: if tag['name'] == "published": tags.remove(tag) new_dict = {u'key': u'published', u'value': u'false'} extras.insert(0,new_dict) toolkit.get_action('package_patch')(None, {'id': package_id, 'extras':extras, 'tags': tags}) return True
def dataset_version_create(context, data_dict): id = data_dict.get('id') parent_name = data_dict.get('base_name') owner_org = data_dict.get('owner_org') parent_dict = { 'name': parent_name, } if owner_org: parent_dict['owner_org'] = owner_org parent_dict['private'] = True else: parent_dict['private'] = False parent = _get_or_create_parent_dataset( context, parent_dict ) toolkit.get_action('package_relationship_create')( _get_context(context), { 'subject': id, 'object': parent['id'], 'type': 'child_of', } )
def vocabulary(self, name): """Retrieve or create a vocabulary""" context = {"user": self.admin_user().name} try: return toolkit.get_action("vocabulary_show")(context, {"id": name}) except: return toolkit.get_action("vocabulary_create")(context, {"name": name, "tags": []})
def _prepare_celery(context, data_dict, task_type): task_id = str(uuid.uuid4()) user = context['auth_user_obj'] if not user.sysadmin: raise toolkit.NotAuthorized userapikey = user.apikey celery_context = json.dumps({ 'site_url': config.get('ckan.site_url'), 'apikey': userapikey, }) task_status = { 'entity_id': data_dict.get('resource', "without resource"), 'entity_type': u'resource', 'task_type': task_type, 'key': u'celery_task_id', 'value': data_dict.get('word', ''), 'state':'Preparing', 'error': u'task_id:%s' % task_id, 'last_updated': datetime.now().isoformat() } toolkit.get_action('task_status_update')(context, task_status) return task_id, celery_context
def resource_schema_pkey_delete(context, data_dict): '''Delete a resource's schema's primary key. :param resource_id: the ID of the resource :type resource_id: string ''' try: data_dict, errors = dictization_functions.validate(data_dict, schema.resource_schema_pkey_delete_schema(), context) except exceptions.InvalidResourceIDException: raise toolkit.ValidationError(toolkit._("Invalid resource_id")) assert not errors # Nothing in resoource_schema_pkey_delete_schema ever # adds anything to the errors dict. resource_id = data_dict.pop('resource_id') schema_ = toolkit.get_action('resource_schema_show')(context, {'resource_id': resource_id}) if 'primaryKey' in schema_: del schema_['primaryKey'] schema_ = json.dumps(schema_) resource_dict = toolkit.get_action('resource_show')(context, {'id': resource_id}) toolkit.get_action('resource_update')(context, {'id': resource_id, 'url': resource_dict['url'], 'name': resource_dict['name'], 'schema': schema_})
def resource_datapreview(self, resource_id, id): ''' Embeded page for a resource data-preview. Depending on the type, different previews are loaded. This could be an img tag where the image is loaded directly or an iframe that embeds a webpage, recline or a pdf preview. ''' context = { 'model': model, 'session': model.Session, 'user': c.user or c.author, 'auth_user_obj': c.userobj } try: c.resource = toolkit.get_action('resource_show')(context, {'id': resource_id}) c.package = toolkit.get_action('package_show')(context, {'id': id}) data_dict = {'resource': c.resource, 'package': c.package} c.resource_json = json.dumps(c.resource) except NotFound: abort(404, _('Resource not found')) except NotAuthorized: abort(401, _('Unauthorized to read resource %s') % id) else: return render('recline_interlink.html')
def manage_assets(self, id, resource_id): # inits context self._init_context() if not c.userobj or not c.userobj.sysadmin: base.abort(404) try: toolkit.c.pkg_dict = toolkit.get_action('package_show')(None, {'id': id}) toolkit.c.resource = toolkit.get_action('resource_show')(None, {'id': resource_id}) except toolkit.ObjectNotFound: base.abort(404, _('Resource not found')) except toolkit.NotAuthorized: base.abort(401, _('Unauthorized to edit this resource')) page = int(request.params.get('page',1)) assets = [] try: result = toolkit.get_action('datastore_search')(self.context,{ 'id':resource_id, 'limit':ASSETS_PER_PAGE, 'offset':(page-1)*ASSETS_PER_PAGE, 'sort':'_id asc' }) assets.extend(result['records']) except toolkit.ObjectNotFound: return base.render('package/manage_assets.html') hidden_assets = [] hidden = DFMPSearchQuery.run({ 'q':'id:{res_id}'.format(res_id=resource_id), 'rows':100, 'start':0, 'fq':'+state:hidden', })['results'] if hidden: for item in hidden: hidden_assets.append(json.loads(item['data_dict'])) extra_vars = { 'assets':assets, 'hidden_assets':hidden_assets, 'action_url':h.url_for('ajax_actions'), } def pager_url(q=None, page=None): params = [ ('page', page), ] url = h.url_for('manage_assets', id=id, resource_id=resource_id) return url_with_params(url, params) c.page = h.Page( collection=assets, page=page, url=pager_url,#pager_url, item_count=result.get('total',0), items_per_page=ASSETS_PER_PAGE, ) return base.render('package/manage_assets.html', extra_vars=extra_vars)
def _listener_route(self, action, id, resource_id): if not c.userobj or not c.userobj.sysadmin: base.abort(404) if action == 'terminate': task = session.query(model.TaskStatus)\ .filter( model.TaskStatus.task_type=='twitter_streaming', model.TaskStatus.entity_id==resource_id)\ .first() if not task: h.flash_error("Can't find listener") if task: pid = task.error or '' if not pid: h.flash_error("Can't get PID of process") else: h.flash_success('Success') toolkit.get_action('task_status_update')(None, { 'entity_id': resource_id, 'task_type': 'twitter_streaming', 'key': 'celery_task_id', 'state': 'Terminated', 'value': 'Ready for start', 'error': pid, 'last_updated': datetime.datetime.now().isoformat(), 'entity_type': 'resource' }) if os.system('kill -9 %s' % pid): toolkit.get_action('celery_revoke')(self.context, {'id': pid, 'resource': resource_id}) base.redirect(h.url_for('getting_tweets', id=id, resource_id=resource_id))
def resource_schema_pkey_create(context, data_dict): '''Add a primary key to a resource's schema. :param resource_id: the ID of the resource :type resource_id: string :param pkey: the primary key, either the name of one of the fields or a list of field names from the resource's schema :type pkey: string or iterable of strings :returns: the primary key that was created :rtype: string or list of strings ''' # Fail if the resource already has a primary key. resource_id = toolkit.get_or_bust(data_dict, 'resource_id') try: pkey = toolkit.get_action('resource_schema_pkey_show')(context, {'resource_id': resource_id}) except exceptions.InvalidResourceIDException: raise toolkit.ValidationError(toolkit._("Invalid resource_id")) if pkey is not None: raise toolkit.ValidationError(toolkit._("The resource already has a " "primary key")) # Otherwise create is the same as update. return toolkit.get_action('resource_schema_pkey_update')(context, data_dict)
def download_tabular_data_format(self, package_id): '''Return the given package as a Tabular Data Format ZIP file. ''' context = { 'model': model, 'session': model.Session, 'user': toolkit.c.user or toolkit.c.author, } r = toolkit.response r.content_disposition = 'attachment; filename={0}.zip'.format( package_id) r.content_type = 'application/octet-stream' # Make a zipstream and put it in the context. This means the # package_to_tabular_data_format action will add files into # the zipstream for us. pkg_zipstream = zipstream.ZipFile(mode='w', compression=zipstream.ZIP_DEFLATED) context['pkg_zipstream'] = pkg_zipstream toolkit.get_action('package_to_tabular_data_format')(context, {'id': package_id}) return pkg_zipstream
def reauthor(dataset, author_mail, msg, context): """ notify author that dataset should be revised """ body = u""" Dear Author, the Editors of '{journal}' have requested that you revise the replication files named '{title}' which you submitted to the ZBW--Journal Data Archive. URL: {url} {message} """ def create_message(): if msg: return u"Message: \n========\n\n{}".format(msg) return u"" pkg = tk.get_action('package_show')(context, {'id': dataset}) org_id = pkg.get('owner_org', pkg.get('group_id', False)) org = tk.get_action('organization_show')(context, {'id': org_id}) d = {'journal': org['title'], 'url': package_url(dataset), 'title': pkg.get('name'), 'message': create_message()} body = body.format(**d) message = MIMEText(body.encode('utf-8'), 'plain', 'utf-8') message['Subject'] = Header(u"ZBW Journal Data Archive: Please revise your uploaded dataset") message['From'] = config.get('smtp.mail_from') message['To'] = Header(author_mail, 'utf-8') message['Date'] = Utils.formatdate(time()) message['X-Mailer'] = "CKAN {} [Plugin edawax]".format(ckan_version) return sendmail(author_mail, message) # boolean
class LanddbcustomizePlugin(plugins.SingletonPlugin, toolkit.DefaultDatasetForm, DefaultTranslation): plugins.implements(plugins.IConfigurer) plugins.implements(plugins.IDatasetForm) plugins.implements(plugins.ITemplateHelpers) plugins.implements(plugins.IFacets) plugins.implements(plugins.IPackageController, inherit=True) plugins.implements(plugins.ITranslation) if update_vocab: #translation of terms (these cannot use ITranslate) try: user = toolkit.get_action('get_site_user')({ 'ignore_auth': True }, {}) context = {'user': user['name']} term_translation_update = toolkit.get_action( 'term_translation_update') for tagList in [ regions_tags, datasources_tags, updatefreqs_tags, custom_tags ]: for tag in tagList: data = { 'term': tag[0], 'term_translation': tag[0], 'lang_code': "zh_Hant_TW", } term_translation_update(context, data) data = { 'term': tag[0], 'term_translation': tag[1], 'lang_code': "zh_Hans_CN", } term_translation_update(context, data) data = { 'term': tag[0], 'term_translation': tag[2], 'lang_code': "en", } term_translation_update(context, data) except: print("Vocab not updated") # create_vocab('regions', [ t[0] for t in regions_tags] ) # IConfigurer def update_config(self, config_): toolkit.add_template_directory(config_, 'templates') toolkit.add_public_directory(config_, 'public') #toolkit.add_resource('fanstatic', 'landdbcustomize') toolkit.add_resource('assets', 'landdbcustomize') # IFacet def dataset_facets(self, facets_dict, package_type): new_facets_dict = extend_facets_dict(facets_dict) return new_facets_dict def group_facets(self, facets_dict, group_type, package_type): # somehow have to edit the facets_dict inplace instead of making a new one # https://github.com/ckan/ckan/issues/2713 n = len(facets_dict) items = [] for i in range(n): items.append(facets_dict.popitem(last=False)) facets_dict['vocab_regions'] = toolkit._(u'Region') facets_dict['vocab_datasources'] = toolkit._(u'Data source') facets_dict['vocab_updatefreqs'] = toolkit._(u'Update frequency') for k, v in items: # print("facet dict",k,v) if k == 'groups': facets_dict[k] = toolkit._(u'Topics') continue facets_dict[k] = v return facets_dict def organization_facets(self, facets_dict, organization_type, package_type): # somehow have to edit the facets_dict inplace instead of making a new one # https://github.com/ckan/ckan/issues/2713 n = len(facets_dict) items = [] for i in range(n): items.append(facets_dict.popitem(last=False)) facets_dict['vocab_regions'] = toolkit._(u'Region') facets_dict['vocab_datasources'] = toolkit._(u'Data source') facets_dict['vocab_updatefreqs'] = toolkit._(u'Update frequency') for k, v in items: # print("facet dict",k,v) if k == 'groups': facets_dict[k] = toolkit._(u'Topics') continue facets_dict[k] = v return facets_dict # IPackageController def before_search(self, search_params): extras = search_params.get('extras') if not extras: # There are no extras in the search params, so do nothing. return search_params start_date = extras.get('ext_startdate') print("sd", start_date) end_date = extras.get('ext_enddate') print("ed", end_date) if not start_date and not end_date: # The user didn't select either a start and/or end date, so do nothing. return search_params if not start_date: start_date = '*' if not end_date: end_date = '*' # Add a date-range query with the selected start and/or end dates into the Solr facet queries. fq = search_params.get('fq', u'') fq = u'{fq} +extras_start_date:[* TO {ed}] +extras_end_date:[{sd} TO *]'.format( fq=fq, sd=start_date, ed=end_date) search_params['fq'] = fq return search_params # ITemplateHelpers # inform the template of our custom vocab def get_helpers(self): return { 'regions': regions, 'datasources': datasources, 'updatefreqs': updatefreqs, } # IDatasetForm def _modify_package_schema(self, schema): # custom tags schema.update({ 'title_en': [ toolkit.get_validator('ignore_missing'), toolkit.get_converter('convert_to_extras'), ], 'region': [ toolkit.get_validator('ignore_missing'), toolkit.get_converter('convert_to_tags')('regions'), regions_autoadd, toolkit.get_converter('convert_to_tags')('regions'), ], 'datasource': [ toolkit.get_validator('ignore_missing'), toolkit.get_converter('convert_to_tags')('datasources') ], 'updatefreq': [ toolkit.get_validator('ignore_missing'), toolkit.get_converter('convert_to_tags')('updatefreqs') ], 'start_date': [ toolkit.get_validator('ignore_missing'), toolkit.get_validator('isodate'), toolkit.get_converter('convert_to_extras'), ], 'end_date': [ toolkit.get_validator('ignore_missing'), toolkit.get_validator('isodate'), toolkit.get_converter('convert_to_extras') ], 'last_update_date': [ toolkit.get_validator('ignore_missing'), toolkit.get_validator('isodate'), toolkit.get_converter('convert_to_extras') ], }) return schema def create_package_schema(self): # let's grab the default schema in our plugin schema = super(LanddbcustomizePlugin, self).create_package_schema() schema = self._modify_package_schema(schema) return schema def update_package_schema(self): # let's grab the default schema in our plugin schema = super(LanddbcustomizePlugin, self).update_package_schema() schema = self._modify_package_schema(schema) return schema def show_package_schema(self): schema = super(LanddbcustomizePlugin, self).show_package_schema() #this line prevent custom tag show in the "tags" field schema['tags']['__extras'].append( toolkit.get_converter('free_tags_only')) #add custom tags schema.update({ 'title_en': [ toolkit.get_converter('convert_from_extras'), toolkit.get_validator('ignore_missing') ], 'region': [ toolkit.get_converter('convert_from_tags')('regions'), toolkit.get_validator('ignore_missing') ], 'datasource': [ toolkit.get_converter('convert_from_tags')('datasources'), toolkit.get_validator('ignore_missing') ], 'updatefreq': [ toolkit.get_converter('convert_from_tags')('updatefreqs'), toolkit.get_validator('ignore_missing') ], 'start_date': [ toolkit.get_converter('convert_from_extras'), # toolkit.get_validator('isodate'), toolkit.get_validator('ignore_missing') ], 'end_date': [ toolkit.get_converter('convert_from_extras'), # toolkit.get_validator('isodate'), toolkit.get_validator('ignore_missing') ], 'last_update_date': [ toolkit.get_converter('convert_from_extras'), # toolkit.get_validator('isodate'), toolkit.get_validator('ignore_missing') ], }) return schema def before_view(self, dataset_dict): desired_lang_code = request.environ['CKAN_LANG'] fallback_lang_code = config.get('ckan.locale_default', 'en') translated_title = dataset_dict.get("title_" + desired_lang_code, None) if translated_title: dataset_dict["title"] = translated_title return dataset_dict def is_fallback(self): # Return True to register this plugin as the default handler for # package types not handled by any other IDatasetForm plugin. return True def package_types(self): # This plugin doesn't handle any special package types, it just # registers itself as the default (above). return []
def action_user_update(context, data_dict): ''' Modified from CKAN: user_update Update a user account. Normal users can only update their own user accounts. Sysadmins can update any user account. For further parameters see ``user_create()``. :param id: the name or id of the user to update :type id: string :returns: the updated user account :rtype: dictionary ''' model = context['model'] user = context['user'] session = context['session'] schema = context.get('schema') or logic.schema.default_update_user_schema() # Modify the schema by adding translation related keys add_translation_modify_schema(schema) upload = uploader.Upload('user') upload.update_data_dict(data_dict, 'image_url', 'image_upload', 'clear_upload') ignore_missing = toolkit.get_validator('ignore_missing') convert_to_extras = toolkit.get_converter('convert_to_extras') schema['job_title'] = [ignore_missing, unicode, convert_to_extras] schema['telephone_number'] = [ignore_missing, unicode, convert_to_extras] schema['main_organization'] = [ignore_missing, unicode, convert_to_extras] schema['image_url'] = [ignore_missing, unicode, convert_to_extras] schema['linkedin'] = [ignore_missing, unicode, convert_to_extras] schema['facebook'] = [ignore_missing, unicode, convert_to_extras] schema['twitter'] = [ignore_missing, unicode, convert_to_extras] schema['blog'] = [ignore_missing, to_list_json, convert_to_extras] schema['www_page'] = [ignore_missing, to_list_json, convert_to_extras] # Add the localized keys for the localized fields to the schema schema = add_languages_modify(schema, _localized_fields) not_empty = toolkit.get_validator('not_empty') schema['fullname'] = [not_empty, unicode] id = logic.get_or_bust(data_dict, 'id') user_obj = model.User.get(id) context['user_obj'] = user_obj if user_obj is None: raise NotFound('User was not found.') # If the translations are not in the data_dict, the user has not added any translations or the user has deleted all translations. # Therefore, the translations are not sent with the POST so we need to empty and update the translations here. if 'translations' not in data_dict: data_dict['translations'] = [] toolkit.check_access('user_update', context, data_dict) data, errors = validate(data_dict, schema, context) if errors: session.rollback() raise ValidationError(errors) for extra in data['extras'] if 'extras' in data else []: user_obj.extras[extra['key']] = extra['value'] user = model_save.user_dict_save(data, context) activity_dict = { 'user_id': user.id, 'object_id': user.id, 'activity_type': 'changed user' } activity_create_context = { 'model': model, 'user': user, 'defer_commit': True, 'ignore_auth': True, 'session': session } toolkit.get_action('activity_create')(activity_create_context, activity_dict) # Attempt to update drupal user _update_drupal_user(context, data_dict) # TODO: Also create an activity detail recording what exactly changed in # the user. upload.upload(uploader.get_max_image_size()) if not context.get('defer_commit'): model.repo.commit() user_data = user_dictize(user, context) for key, value in user.extras.iteritems(): if key in user_data: log.warning( "Trying to override user data with extra variable '%s'", key) continue user_data[key] = value return user_data
def basket_list(user): baskets = tk.get_action('basket_list')({}, {'user_id': user}) return baskets
def index(self, id): """ Clone the specified data set record. Arguments: id (string): URL/slug of the data set. Returns: string: JSON response. Successful clone return value: {'status': 'success', 'redirect_url': <URL of data set edit page> } Data validation error return value: {'status': 'error', 'errors': {<field1>: [<validation error message>], <field2>: [<validation error message>]} } Any other (unexpected) error: {'status': 'error', 'errorMessage': <message> } """ logger = logging.getLogger(__name__) if toolkit.request.method == 'POST': try: # TODO: handle publication pkg = toolkit.get_action('package_show')(None, dict(id=id)) cfg_adst = config.get('ckanext.openalberta.clonable_ds_types', 'opendata,publication') allowed_types = set(re.split('\s*,\s*', cfg_adst)) if pkg['type'] not in allowed_types: logger.warn( 'Requested cloning of unsupported package type (%s). Supported types: %s.', pkg['type'], cfg_adt) return { 'status': 'error', 'errorMessage': _('This package type is not allowed to be cloned.') } pkg['title'] = toolkit.request.params.getone('title') pkg['name'] = toolkit.request.params.getone('name') pkg['date_created'] = pkg['date_modified'] = datetime.now() pkg['state'] = 'draft' del pkg['id'] action = toolkit.get_action('package_create') newpkg = action(self._context, pkg) return { 'status': 'success', 'redirect_url': h.url_for(controller='package', action='edit', id=newpkg['name']) } except toolkit.ValidationError as ve: errflds = set(ve.error_dict.keys()) - {'title', 'name'} if errflds: # There are validation errors other than title and name (slug). # If this happens, it means something is wrong with the package return { 'status': 'error', 'errorMessage': _('The data set is in an invalid state. Please correct it before trying to clone.' ) } return {'status': 'error', 'errors': ve.error_dict} except: logger.exception('Error in PackageCloneController:index') return {'status': 'error', 'errorMessage': _UNEXPECTED_ERROR} else: toolkit.abort(403, _NOT_AUTHORIZED)
def embedKnowledge(data_dict): # [TODO] Get Dataset CSV Resource url from id of resource # Set visibility of loading gear loading = 'src/ckanext-liveschema_theme/ckanext/liveschema_theme/fanstatic/loading.css' loadingFile = open(loading, 'w') loadingFile.write(data_dict["loading"]) loadingFile.close() # Name of folder for intermediate results path = "src/ckanext-liveschema_theme/ckanext/liveschema_theme/public/resources/" + data_dict[ "dataset_name"] + "/" # Create Directory if not already present if not os.path.isdir(path): os.makedirs(path) # Create the dataframe from the CSV file triples = pd.read_csv(data_dict["dataset_link"], nrows=10001) if (len(triples) > 10000): # Get the final version of the package CKANpackage = toolkit.get_action('package_show')( data_dict={ "id": data_dict["dataset_name"] }) # Iterate over all the resources for resource in CKANpackage["resources"]: # Remove eventual temp resources left in case of error if resource["format"] == "temp": toolkit.get_action("resource_delete")(context={ "ignore_auth": True }, data_dict={ "id": resource["id"] }) # Remove visibility of loading gear if (os.path.isfile(loading)): os.remove(loading) # Return without continuing the execution return # Name of the training file parsedTSV = path + data_dict["dataset_name"] + ".tsv" with open(parsedTSV, "w+") as train: # Iterate over every triples row for index, row in triples.iterrows(): subj = str(row["Subject"]) pred = str(row["Predicate"]) obj = str(row["Object"]).replace('\r\n', '\n').replace('\n', ' | ') train.write(subj + "\t" + pred + "\t" + obj + "\n") # Call function with python3 to execute real embedder out = subprocess32.call( "python3 src/ckanext-liveschema_theme/ckanext/liveschema_theme/logic/knowledgeEmbedder.py " + data_dict["dataset_name"] + " !" + data_dict["options"]["strModel"] + " !" + data_dict["options"]["embedding_dim"] + " !" + data_dict["options"]["normalization_of_entities"] + " !" + data_dict["options"]["scoring_function"] + " !" + data_dict["options"]["margin_loss"] + " !" + data_dict["options"]["random_seed"] + " !" + data_dict["options"]["num_epochs"] + " !" + data_dict["options"]["learning_rate"] + " !" + data_dict["options"]["batch_size"] + " !" + data_dict["options"]["test_set_ratio"] + " !" + data_dict["options"]["filter_negative_triples"] + " !" + data_dict["options"]["maximum_number_of_hpo_iters"], shell=True) # Check if execution went well if (not out): # Name of the embedding Model embeddingModel = "trained_model.pkl" # Upload trained model upload = cgi.FieldStorage() upload.filename = embeddingModel upload.file = file( os.path.normpath(os.path.expanduser(path + embeddingModel))) data = { "id": data_dict["res_id_model"], "format": "EMB", 'url': embeddingModel, #'will-be-overwritten-automatically', 'upload': upload } toolkit.get_action('resource_patch')(context={ 'ignore_auth': True }, data_dict=data) # Name of the embedding embeddingName = data_dict["dataset_name"] + "_Emb_" + data_dict[ "options"]["strModel"] + ".xlsx" # Create a the excel file for the embedding embeddings = xlsxwriter.Workbook(path + embeddingName) # Add bold cell format bold = embeddings.add_format({'bold': True}) # Create the sheet for the EntitiesToEntities relations entities = embeddings.add_worksheet("Entities To Entities") # Create the sheet for the Entities embeddings entityEmb = embeddings.add_worksheet("Entities") # Create the sheet for the RelationsToRelations relations = embeddings.add_worksheet("Relations to Relations") # Create the sheet for the Relations embeddings relationEmb = embeddings.add_worksheet("Relations") # Retrieve Entities Embedding from json file with open(path + 'entities_to_embeddings.json') as entitiesE: entitiesEJSON = json.load(entitiesE) # Get all entities names from embeddings, sorted entitiesNames = sorted(entitiesEJSON.keys(), cmp=locale.strcoll) # Header Cell of Entities is bold entities.write(0, 0, 'Entity|Entity', bold) index = 1 # Iterate over the data and write it out row by row. for entity in entitiesNames: # Save Entity Embeddings entityEmb.write(0, index - 1, entity, bold) entityEmb.write_column( 1, index - 1, list(entitiesEJSON[list(entitiesNames)[index - 1]])) # Initialise Entities to Entities relations entities.write(index, 0, entity) entities.write(0, index, entity) index += 1 # Iterate over every cell of the DataFrame for i in range(0, index - 1): for j in range(i, index - 1): # Work only with elements not on the diagonal and not already checked if (i != j): # Transform the embeddings into a numpy array arrayI = np.array(entitiesEJSON[list(entitiesNames)[i]]) arrayJ = np.array(entitiesEJSON[list(entitiesNames)[j]]) # Compute the Euclidean norm between these 2 arrays norm = np.linalg.norm(arrayI - arrayJ) # Update both the combinations with the norm entities.write(i + 1, j + 1, norm) entities.write(j + 1, i + 1, norm) else: entities.write(i + 1, i + 1, 0) # Retrieve Relations Embedding from json file with open(path + 'relations_to_embeddings.json') as relationsE: relationsEJSON = json.load(relationsE) # Get all relations names from embeddings, sorted relationsNames = sorted(relationsEJSON.keys(), cmp=locale.strcoll) # Header Cell is bold relations.write(0, 0, 'Relation|Relation', bold) index = 1 # Iterate over the data and write it out row by row. for relation in relationsNames: # Save Relations Embeddings relationEmb.write(0, index - 1, relation, bold) relationEmb.write_column( 1, index - 1, list(relationsEJSON[list(relationsNames)[index - 1]])) # Initialise Relations to Relations relations relations.write(index, 0, relation) relations.write(0, index, relation) index += 1 # Iterate over every cell of the DataFrame for i in range(0, index - 1): for j in range(i, index - 1): # Work only with elements not on the diagonal and not already checked if (i != j): # Transform the embeddings into a numpy array arrayI = np.array(relationsEJSON[list(relationsNames)[i]]) arrayJ = np.array(relationsEJSON[list(relationsNames)[j]]) # Compute the Euclidean norm between these 2 arrays norm = np.linalg.norm(arrayI - arrayJ) # Update both the combination with the norm relations.write(i + 1, j + 1, norm) relations.write(j + 1, i + 1, norm) else: relations.write(i + 1, i + 1, 0) # Close the embeddings Excel file embeddings.close() # Upload the csv file to LiveSchema upload = cgi.FieldStorage() upload.filename = embeddingName upload.file = file( os.path.normpath(os.path.expanduser(path + embeddingName))) data = { "id": data_dict["res_id"], "format": "EMB", 'url': embeddingName, #'will-be-overwritten-automatically', 'upload': upload } toolkit.get_action('resource_patch')(context={ 'ignore_auth': True }, data_dict=data) # Add file to DataStore using DataPusher import ckanext.datapusher.logic.action as dpaction dpaction.datapusher_submit( context={'ignore_auth': True}, data_dict={'resource_id': str(data_dict["res_id"])}) # Create a Data Explorer view of the resource toolkit.get_action('resource_view_create')( context={ 'ignore_auth': True }, data_dict={ 'resource_id': str(data_dict["res_id"]), 'title': "Data Explorer", 'view_type': "recline_view" }) else: # If there has been a problem with the execution # Remove temp resources toolkit.get_action('resource_delete')(context={ 'ignore_auth': True }, data_dict={ 'id': data_dict["res_id"] }) toolkit.get_action('resource_delete')(context={ 'ignore_auth': True }, data_dict={ 'id': data_dict["res_id_model"] }) # Get the final version of the package CKANpackage = toolkit.get_action('package_show')( data_dict={ "id": data_dict["dataset_name"] }) # Iterate over all the resources for resource in CKANpackage["resources"]: # Remove eventual temp resources left in case of error if resource["format"] == "temp" and (resource["resource_type"] == "Emb"): toolkit.get_action("resource_delete")(context={ "ignore_auth": True }, data_dict={ "id": resource["id"] }) # Remove intermediate results try: shutil.rmtree(path) except OSError as e: print("Error: %s : %s" % (path, e.strerror)) # Remove visibility of loading gear if (os.path.isfile(loading)): os.remove(loading)
def copy(self, id): context = { 'model': m, 'session': m.Session, 'user': p.toolkit.c.user or p.toolkit.c.author, 'auth_user_obj': p.toolkit.c.userobj, 'save': 'save' in t.request.params, } # check permissions try: t.check_access('package_create', context) except t.NotAuthorized: t.abort(401, t._('Unauthorized to copy this package')) data_dict = {'id': id} data = t.get_action('package_show')(None, data_dict) # change dataset title and name data['name'] = '{}-copy'.format(data['name']) while True: try: _pkg = t.get_action('package_show')(None, { 'name_or_id': data['name'] }) except l.NotFound: break else: import random data['name'] = '{}-copy-{}'.format(data['name'], random.randint(1, 100)) data['title'] = 'Copy of {0}'.format(data['title']) # remove unnecessary attributes from the dataset remove_attrs = [ 'id', 'revision_id', 'metadata_created', 'metadata_modified', 'resources', 'revision_timestamp' ] for attr in remove_attrs: if attr in data: del data[attr] if data and 'type' in data: package_type = data['type'] else: package_type = self._guess_package_type(True) data = data or clean_dict( dict_fns.unflatten( tuplize_dict( parse_params(t.request.params, ignore_keys=CACHE_PARAMETERS)))) c.resources_json = h.json.dumps(data.get('resources', [])) # convert tags if not supplied in data if data and not data.get('tag_string'): data['tag_string'] = ', '.join( h.dict_list_reduce(data.get('tags', {}), 'name')) # if we are creating from a group then this allows the group to be # set automatically data['group_id'] = t.request.params.get('group') or \ t.request.params.get('groups__0__id') # in the phased add dataset we need to know that # we have already completed stage 1 stage = ['active'] if data.get('state', '').startswith('draft'): stage = ['active', 'complete'] form_snippet = self._package_form(package_type=package_type) form_vars = { 'data': data, 'errors': {}, 'error_summary': {}, 'action': 'new', 'stage': stage, 'dataset_type': package_type, } c.errors_json = h.json.dumps({}) # override form action to use built-in package controller c.form_action = t.url_for(controller='package', action='new') self._setup_template_variables(context, {}, package_type=package_type) new_template = self._new_template(package_type) extra_vars = { 'form_vars': form_vars, 'form_snippet': form_snippet, 'dataset_type': package_type } return t.render(new_template, extra_vars=extra_vars)
def migrate_package_to_s3(self, context, package_name): """migrate_package_to_s3 - Migrates package to S3 by calling resource_update on each resource. """ # Obtain logger logger = logging.getLogger(__name__) logger.info("Starting package migration to S3 for package %s", package_name) try: pkg = toolkit.get_action('package_show')(context, { 'id': package_name }) if pkg.get('num_resources') > 0: for resource in pkg.get('resources'): # If the resource is already uploaded to S3, don't reupload if self.skip_existing_s3_upload and \ resource['url_type'] == 's3': logger.info( "Resource %s is already on S3, skipping to next " "resource.", resource.get('name', '')) continue # If filetype of resource is blacklisted, skip the # upload to S3 if not upload.is_blacklisted(resource): try: logger.info( "Attempting to migrate resource %s to S3...", resource.get('name', '')) self.change_to_s3(context, resource) logger.info( "Successfully migrated resource %s to S3.", resource.get('name', '')) except Exception as error: logger.error( "Error when migrating resource %s - %s", resource.get('name', ''), error) raise error else: logger.info( "Resource %s is blacklisted, skipping to next " "resource.", resource.get('name', '')) # Upload resource zipfile to S3 # If not blacklisted, will be done automatically as # part of resource_update. upload.upload_resource_zipfile_to_s3(context, resource) # After updating all the resources, upload package zipfile to S3 upload.upload_package_zipfile_to_s3(context, pkg) except Exception as error: logger.error("Error when migrating package %s with error %s", package_name, error) self.pkg_crashes_w_error.append({ 'pkg_name': package_name, 'error': error }) finally: # Cleanup sqlalchemy session # Required to prevent errors when uploading remaining packages model.Session.remove()
def change_to_s3(self, context, resource): """change_to_s3 - performs resource_update. The before and after update hooks upload the resource and the resource/package zipfiles to S3 """ toolkit.get_action('resource_update')(context, resource)
def create_thumbnail(package_id, resource_id=None, width=None, height=None): '''Creates a thumbnail in a dataset and returns its url :rtype: string ''' if c.user == None or len(c.user) == 0: return None if width == None: cfg_width = config.get('ckan.datasetthumbnail.thumbnail_width', 140) width = toolkit.asint(cfg_width) if height == None: cfg_height = config.get('ckan.datasetthumbnail.thumbnail_height', int(width * 1.415)) height = toolkit.asint(cfg_height) package = toolkit.get_action('package_show')(context={ 'ignore_auth': True }, data_dict={ 'id': package_id }) resource = None if resource_id != None: resource = toolkit.get_action('resource_show')(context={ 'ignore_auth': True }, data_dict={ 'id': resource_id }) if resource == None: for pkg_resource in package['resources']: if pkg_resource['format'] == 'JPEG' or pkg_resource[ 'format'] == 'PNG': resource = pkg_resource break if resource != None: headers = {} if resource['url_type'] == 'upload': if hasattr(c, 'userobj') and hasattr(c.userobj, 'apikey'): headers['Authorization'] = c.userobj.apikey try: response = requests.get(resource['url'], headers=headers, stream=True) except requests.exceptions.RequestException: # Silently fail on any request exception on the basis that it's # better to have a working page with missing thumbnails than a # broken one. return if response.status_code == 200: original_fp = StringIO( ) #create an in-memory file object in which to save the image for chunk in response.iter_content(1024): original_fp.write(chunk) original_fp.flush() image = None try: image = Image.open(original_fp) except IOError: #if an image can't be parsed from the response... return None image.thumbnail((width, height)) thumbnail_fp = StringIO() thumbnail_fp.name = 'thumbnail.png' image.save(thumbnail_fp, format='PNG') thumbnail_resource = {} thumbnail_resource['package_id'] = package['id'] thumbnail_resource['url'] = 'thumbnail.png' thumbnail_resource['url_type'] = 'upload' thumbnail_resource['format'] = 'png' thumbnail_resource['name'] = 'thumbnail.png' thumbnail_resource['upload'] = _UploadLocalFileStorage( thumbnail_fp) created_resource = toolkit.get_action('resource_create')( context={ 'ignore_auth': True }, data_dict=thumbnail_resource) thumbnail_fp.close() original_fp.close() return created_resource['url'] return None
def _add_member_to_groups(member, context): groups = tk.get_action('group_list')(context, {}) for group in groups: _add_member_to_group(member, group, context)
def _get_action(action, data_dict): return toolkit.get_action(action)(_get_context(), data_dict)
def copy_resources(self, id, data=None, errors=None, error_summary=None): context = { 'model': m, 'session': m.Session, 'user': p.toolkit.c.user or p.toolkit.c.author, 'auth_user_obj': p.toolkit.c.userobj, 'save': 'save' in t.request.params, } # check permissions try: t.check_access('package_create', context) except t.NotAuthorized: t.abort(401, t._('Unauthorized to copy this package')) # get package type if data and 'type' in data: package_type = data['type'] else: package_type = self._guess_package_type(True) resources = None if data is None: data = t.get_action('package_show')(None, {'id': id}) # generate new unused package name data['title'] = 'Copy of {0}'.format(data['title']) data['name'] = '{}-copy'.format(data['name']) while True: try: _ = t.get_action('package_show')(None, { 'name_or_id': data['name'] }) except l.NotFound: break else: import random data['name'] = '{}-copy-{}'.format(data['name'], random.randint(1, 100)) # remove unnecessary attributes from the dataset remove_attrs = [ 'id', 'revision_id', 'metadata_created', 'metadata_modified', 'revision_timestamp' ] for attr in remove_attrs: if attr in data: del data[attr] # process package resources resources = data.pop('resources', []) remove_attrs = ('id', 'revision_id', 'created', 'last_modified', 'package_id') for resource in resources: for attr in remove_attrs: if attr in resource: del resource[attr] c.resources_json = h.json.dumps(resources) form_snippet = 'package/copy_package_form.html' c.form_action = t.url_for( controller='ckanext.sokigo.controller:CopyController', action='copy_resources', id=id) if context['save'] and t.request.method == 'POST': data = clean_dict( dict_fns.unflatten( tuplize_dict( parse_params(t.request.POST, ignore_keys=CACHE_PARAMETERS)))) data['resources'] = resources # convert tags if not supplied in data if data and not data.get('tag_string'): data['tag_string'] = ', '.join( h.dict_list_reduce(data.get('tags', {}), 'name')) # if we are creating from a group then this allows the group to be # set automatically data['group_id'] = t.request.params.get('group') or \ t.request.params.get('groups__0__id') try: pkg_dict = t.get_action('package_create')(context, data) except l.NotAuthorized: t.abort(403, _('Unauthorized to read package %s') % '') except l.NotFound as e: t.abort(404, _('Dataset not found')) except dict_fns.DataError: t.abort(400, _(u'Integrity Error')) except SearchIndexError as e: try: exc_str = text_type(repr(e.args)) except Exception: # We don't like bare excepts exc_str = text_type(str(e)) t.abort(500, _(u'Unable to add package to search index.') + exc_str) except t.ValidationError as e: data['state'] = 'none' c.data = data c.errors_json = h.json.dumps(e.error_dict) form_vars = { 'data': data, 'errors': e.error_dict, 'error_summary': e.error_summary, 'action': 'new', 'stage': data['state'], 'dataset_type': package_type } extra_vars = { 'form_vars': form_vars, 'form_snippet': form_snippet, 'dataset_type': package_type } return t.render('package/copy.html', extra_vars=extra_vars) else: h.redirect_to(controller='package', action='read', id=pkg_dict['name']) c.data = data c.errors_json = h.json.dumps(errors) form_vars = { 'data': data, 'errors': errors or {}, 'error_summary': error_summary or {}, 'action': 'new', 'stage': data['state'], 'dataset_type': package_type } extra_vars = { 'form_vars': form_vars, 'form_snippet': form_snippet, 'dataset_type': package_type } return t.render('package/copy.html', extra_vars=extra_vars)
def _create_or_update_dataset(dataset): context = {} user = tk.get_action('get_site_user')({'ignore_auth': True}, {}) context.update({'user': user['name']}) harvester = SwissDCATRDFHarvester() name = harvester._gen_new_name(dataset['title']) package_plugin = lib_plugins.lookup_package_plugin('dataset') data_dict = { 'identifier': dataset['identifier'], 'include_private': True, 'include_drafts': True, } try: existing_dataset = tk.get_action('ogdch_dataset_by_identifier')( context, data_dict ) context['schema'] = package_plugin.update_package_schema() # Don't change the dataset name even if the title has changed dataset['name'] = existing_dataset['name'] dataset['id'] = existing_dataset['id'] # Don't make a dataset public if it wasn't already is_private = existing_dataset['private'] dataset['private'] = is_private map_existing_resources_to_new_dataset(dataset, existing_dataset) tk.get_action('package_update')(context, dataset) success_message = 'Updated dataset %s.' % dataset['name'] if is_private: success_message += ' The dataset visibility is private.' h.flash_success(success_message) except NotFound as e: package_schema = package_plugin.create_package_schema() context['schema'] = package_schema # We need to explicitly provide a package ID dataset['id'] = str(uuid.uuid4()) package_schema['id'] = [str] dataset['name'] = name # Create datasets as private initially dataset['private'] = True try: tk.get_action('package_create')(context, dataset) except ValidationError as e: h.flash_error( 'Error importing dataset %s: %r' % (dataset.get('name', ''), e.error_summary)) return h.flash_success( 'Created dataset %s. The dataset visibility is private.' % dataset['name']) except Exception as e: h.flash_error( 'Error importing dataset %s: %r' % (dataset.get('name', ''), e))
def _get_action(self, action, data_dict): return toolkit.get_action(action)(self._get_ctx(), data_dict)
def _get_or_create_user(self, env): #WSGI Variables #Shib-Application-ID 'default' #Shib-Authentication-Instant '2012-08-13T12:04:22.492Z' #Shib-Authentication-Method 'urn:oasis:names:tc:SAML:2.0:ac:classes:PasswordProtectedTransport' #Shib-AuthnContext-Class 'urn:oasis:names:tc:SAML:2.0:ac:classes:PasswordProtectedTransport' #Shib-Identity-Provider 'https://idp.example.com/idp/shibboleth' #Shib-Session-ID '_7ec5a681e6dbae627c1cefcc7cb4d56a' #Shib-Session-Index '39dafd8477850f5e0b968e3561570197f2109948c1d374a7a2b4c9a7adbf8628' #cn 'My Other Self' #givenName 'My Other Self' #mail '*****@*****.**' eppn = env.get(self.eppn, None) fullname = env.get(self.fullname, None) email = env.get(self.mail, None) if not eppn or not fullname: log.debug( 'Environ does not contain eppn or cn attributes, user not loaded.') return None user = model.Session.query(model.User).autoflush(False) \ .filter_by(name=eppn).first() # Check if user information from shibboleth has changed if user: if (user.fullname != fullname or user.email != email): log.debug('User attributes modified, updating.') user.fullname = fullname user.email = email else: # user is None: log.debug('User does not exists, creating new one.') basename = eppn username = basename suffix = 0 while not model.User.check_name_available(username): suffix += 1 username = basename + str(suffix) user = model.User(name=username, fullname=fullname, email=email, openid=eppn) model.Session.add(user) model.Session.flush() log.info('Created new user {usr}'.format(usr=fullname)) groups = env.get(self.groups, None) if groups: groups = groups.split(";") log.debug("groups: {}".format(sorted(groups))) orgs = toolkit.get_action('group_list')(data_dict={ 'all_fields': True, 'include_extras': True }) log.debug("orgs: {}".format(orgs)) add_member = toolkit.get_action('group_member_create') # Ensure there's a pylons.translator object registry = Registry() registry.prepare() registry.register(pylons.translator, MockTranslator()) for o in orgs: for e in o['extras']: if e['key'] == 'ecgroup' and e['value'] in groups: log.debug("Adding {} to {}".format(user.name, o['name'])) add_member(context={ 'user': '******' }, data_dict={ 'id': o['name'], 'username': user.name, 'role': 'member' }) model.Session.commit() model.Session.remove() return user
def _add_members_to_group(group, context): members = tk.get_action('user_list')(context, {}) for member in members: if not member['sysadmin']: _add_member_to_group(member.get('id'), group, context)
def generateFCA(data_dict): # [TODO] Get Dataset CSV Resource url from id of resource # Set visibility of loading gear loading = 'src/ckanext-liveschema_theme/ckanext/liveschema_theme/fanstatic/loading.css' loadingFile = open(loading, 'w') loadingFile.write(data_dict["loading"]) loadingFile.close() # Create the dataframe from the CSV file triples = pd.read_csv(data_dict["dataset_link"], nrows=10001) if (len(triples) > 10000): # Get the final version of the package CKANpackage = toolkit.get_action('package_show')( data_dict={ "id": data_dict["dataset_name"] }) # Iterate over all the resources for resource in CKANpackage["resources"]: # Remove eventual temp resources left in case of error if resource["format"] == "temp": toolkit.get_action("resource_delete")(context={ "ignore_auth": True }, data_dict={ "id": resource["id"] }) # Remove visibility of loading gear if (os.path.isfile(loading)): os.remove(loading) # Return without continuing the execution return # Sort the DataFrame triples = triples.sort_values("ObjectTerm") # Create the DataFrame used to create the FCA matrix matrix = pd.DataFrame(columns=["TypeTerm", "PropertiesTokens"]) # Create the strings used to store multiple values in the same row, using " - " as separator obj = "" propTokens = "" # Dictionary used to store the triple dict_ = dict() # Iterate over every triples row for index, row in triples.iterrows(): # Check if the triple has to be saved, if there is a predicate selection then checks if that predicate has to be saved bool_ = False # If there is no predicate selection then save every triple strPredicates = data_dict["strPredicates"] if (len(strPredicates.split()) == 0): bool_ = True # If there is a predicate selection then check if that predicate has to be saved else: for pred in strPredicates.split(): if (pred == str(row["PredicateTerm"]) or pred == str(row["Predicate"])): bool_ = True break # Check if the triple has to be saved if (bool_ and "http" == str(row["Subject"])[0:4] and "http" == str(row["Object"])[0:4]): # If the object value on the row has changed(first row or a new object) if (row["ObjectTerm"] != obj): # If the name of the object is not null if (len(obj)): # Add to the dictionary the latest values of the row dict_["PropertiesTokens"] = propTokens[3:] # Store the row in the matrix matrix = matrix.append(dict_, ignore_index=True) # Reset the name of the new object obj = row["ObjectTerm"] # Reset the other values of the row propTokens = "" # Store in the dictionary the fixed values of the row #dict_ = {"Type": " " + row["Object"], "TypeTerm": row["ObjectTerm"]} dict_ = {"TypeTerm": row["ObjectTerm"]} # Tokenize on capitalLetters the SubjectTerm obtaining the simple words of its composition as strings separated by " " pTok = tokenTerm(row["SubjectTerm"]) # Add the info of the tokens to the string containing multiple values propTokens = propTokens + " - " + pTok # Update the last row with the latest info dict_["PropertiesTokens"] = propTokens[3:] # Store the last row in the matrix matrix = matrix.append(dict_, ignore_index=True) # Set used to avoid the creation of the same column more than once tokSet = set() # Iterate over every row of the matrix for index, row in matrix.iterrows(): # Create a list of tokens from that row's PropertiesTokens' cell toks = [ x for x in row["PropertiesTokens"].replace("- ", "").split(" ") if x ] # For every token in toks for tok in toks: # Check if that token is already a column setInd = len(tokSet) tokSet.add(tok) # If the token is new if (setInd < len(tokSet)): # Create a column of 0 for that token matrix[tok] = 0 # Update the value of the cell in the row of the matrix with tok as column(obtaining the number of token for that row) matrix.at[index, tok] = matrix.at[index, tok] + 1 # Drop PropertiesTokens since it is no more useful matrix.drop("PropertiesTokens", axis=1, inplace=True) # Parse the FCA matrix into the csv file matrix.to_csv( os.path.normpath( os.path.expanduser( "src/ckanext-liveschema_theme/ckanext/liveschema_theme/public/" + data_dict["dataset_name"] + "_FCA.csv"))) # Upload the csv file to LiveSchema upload = cgi.FieldStorage() upload.filename = data_dict["dataset_name"] + "_FCA.csv" upload.file = file( "src/ckanext-liveschema_theme/ckanext/liveschema_theme/public/" + data_dict["dataset_name"] + "_FCA.csv") data = { 'id': data_dict["res_id"], 'url': data_dict["dataset_name"] + "_FCA.csv", #'will-be-overwritten-automatically', 'upload': upload, "format": "FCA" } toolkit.get_action('resource_patch')(context={ 'ignore_auth': True }, data_dict=data) # Add file to DataStore using DataPusher import ckanext.datapusher.logic.action as dpaction dpaction.datapusher_submit( context={'ignore_auth': True}, data_dict={'resource_id': str(data_dict["res_id"])}) # Create a Data Explorer view of the resource toolkit.get_action('resource_view_create')(context={ 'ignore_auth': True }, data_dict={ 'resource_id': str(data_dict["res_id"]), 'title': "Data Explorer", 'view_type': "recline_view" }) # Remove the temporary csv file from the server os.remove("src/ckanext-liveschema_theme/ckanext/liveschema_theme/public/" + data_dict["dataset_name"] + "_FCA.csv") # Get the final version of the package CKANpackage = toolkit.get_action('package_show')( data_dict={ "id": data_dict["dataset_name"] }) # Iterate over all the resources for resource in CKANpackage["resources"]: # Remove eventual temp resources left in case of error if resource["format"] == "temp" and (resource["resource_type"] == "FCA"): toolkit.get_action("resource_delete")(context={ "ignore_auth": True }, data_dict={ "id": resource["id"] }) # Remove visibility of loading gear if (os.path.isfile(loading)): os.remove(loading)
def dashboard(self): context = {'user': c.user, 'auth_user_obj': c.userobj} c.harvest_source_infos = tk.get_action( 'get_harvest_source_infos_for_user')(context, {}) # noqa return render('harvester_dashboard/list.html')
def resource_read(self, id, resource_id): context = { 'model': model, 'session': model.Session, 'user': c.user, 'auth_user_obj': c.userobj, 'for_view': True } if context['user']: '''If user is logged in ''' viewer_id = context['auth_user_obj'].id save_view_details(viewer_id, resource_id, context) try: c.package = toolkit.get_action('package_show')(context, {'id': id}) except (NotFound, NotAuthorized): abort(404, _('Dataset not found')) for resource in c.package.get('resources', []): if resource['id'] == resource_id: c.resource = resource break if not c.resource: abort(404, _('Resource not found')) # required for nav menu c.pkg = context['package'] c.pkg_dict = c.package dataset_type = c.pkg.type or 'dataset' # get package license info license_id = c.package.get('license_id') try: c.package['isopen'] = model.Package.\ get_license_register()[license_id].isopen() except KeyError: c.package['isopen'] = False # TODO: find a nicer way of doing this c.datastore_api = '%s/api/action' % \ config.get('ckan.site_url', '').rstrip('/') c.resource['can_be_previewed'] = self._resource_preview({ 'resource': c.resource, 'package': c.package }) resource_views = toolkit.get_action('resource_view_list')( context, { 'id': resource_id }) c.resource['has_views'] = len(resource_views) > 0 current_resource_view = None view_id = request.GET.get('view_id') if c.resource['can_be_previewed'] and not view_id: current_resource_view = None elif c.resource['has_views']: if view_id: current_resource_view = [ rv for rv in resource_views if rv['id'] == view_id ] if len(current_resource_view) == 1: current_resource_view = current_resource_view[0] else: abort(404, _('Resource view not found')) else: current_resource_view = resource_views[0] vars = { 'resource_views': resource_views, 'current_resource_view': current_resource_view, 'dataset_type': dataset_type } template = self._resource_template(dataset_type) return render(template, extra_vars=vars)
def edit(self, id=None, data=None, errors=None, error_summary=None): context = { 'save': 'save' in request.params, 'schema': self._edit_form_to_db_schema(), 'model': model, 'session': model.Session, 'user': c.user, 'auth_user_obj': c.userobj } if id is None: if c.userobj: id = c.userobj.id else: abort(400, _('No user specified')) data_dict = {'id': id} try: check_access('user_update', context, data_dict) except NotAuthorized: abort(403, _('Unauthorized to edit a user.')) if (context['save']) and not data: return self._save_edit(id, context) try: old_data = toolkit.get_action('user_show')(context, data_dict) schema = self._db_to_edit_form_schema() if schema: old_data, errors = \ dictization_functions.validate(old_data, schema, context) c.display_name = old_data.get('display_name') c.user_name = old_data.get('name') extra_data = user_analytics_present(context) if extra_data: old_data['occupation'] = extra_data.occupation data = data or old_data except NotAuthorized: abort(403, _('Unauthorized to edit user %s') % '') except NotFound: abort(404, _('User not found')) user_obj = context.get('user_obj') if not (authz.is_sysadmin(c.user) or c.user == user_obj.name): abort(403, _('User %s not authorized to edit %s') % (str(c.user), id)) errors = errors or {} vars = { 'data': data, 'errors': errors, 'error_summary': error_summary, 'origin': origin, 'countries': allCountries, 'occupations': occupations } self._setup_template_variables( { 'model': model, 'session': model.Session, 'user': c.user }, data_dict) c.is_myself = True c.show_email_notifications = asbool( config.get('ckan.activity_streams_email_notifications')) c.form = render(self.edit_user_form, extra_vars=vars) return render('user/edit.html')
def _get_action_request_data(self, api_action): function = toolkit.get_action(api_action) side_effect_free = getattr(function, 'side_effect_free', False) request_data = self._get_request_data(try_url_params=side_effect_free) return request_data
def get_homepage_datasets(self, *args, **kwargs): psearch = toolkit.get_action("package_search") psearch_ret = psearch(data_dict={"sort": "timestamp desc", "rows": 5}) results = psearch_ret['results'] return results
class InitEurovoc(CkanCommand): """ """ summary = __doc__.split('\n')[0] usage = __doc__ max_args = 0 min_args = 0 def __init__(self, name): super(InitEurovoc, self).__init__(name) def command(self): """ """ self._load_config() log = logging.getLogger(__name__) import ckan.model as model log.info('ADDING GROUPS (Eurovoc Domains)') root = ET.parse('ckanext/eurovoc/eurovoc_xml/dom_en.xml').getroot() for record in root.iter('RECORD'): id = record.find('DOMAINE_ID').text title = record.find('LIBELLE').text.title() name = slugify(title).lower() desc = 'Eurovoc Domain: ' + id + ' ' + title grp_dict = {'id': id, 'title': title, 'name': name, 'type': 'group', 'extras': [{'key': 'Eurovoc Domain', 'value': title}, {'key': 'Eurovoc Domain ID', 'value': id}]} log.info('Creating group: ' + id + ' - ' + title) context = {'user': '******', 'model': model, 'session': model.Session} try: toolkit.get_action('group_create')(context, grp_dict) except: pass log.info('ADDING VOCABULARY THESAURUS') context = {'user': '******', 'model': model, 'session': model.Session} voc_dict = {'name': 'eurovoc_thesaurus'} try: voc = toolkit.get_action('vocabulary_create')(context, voc_dict) except ValidationError, e: voc = toolkit.get_action('vocabulary_show')(context, {'id': 'eurovoc_thesaurus'}) thesroot = ET.parse('ckanext/eurovoc/eurovoc_xml/thes_en.xml').getroot() for record in thesroot.iter('RECORD'): id = record.find('THESAURUS_ID').text title = record.find('LIBELLE').text.title() name = slugify(title) name_new = re.sub(r'(:S|\'[sS])', r's', title) name_new = re.sub(u'[^a-zA-Z0-9]', r' ', name_new) name_new = re.sub(u'\s+', r'-', name_new) log.info('Creating tag: ' + name_new) context = {'user': '******', 'model': model, 'session': model.Session} del_dict = {'id': name, 'vocabulary_id': voc['id']} try: toolkit.get_action('tag_delete')(context, del_dict) except: pass del_dict['id'] = name_new try: toolkit.get_action('tag_delete')(context, del_dict) except: pass tag_dict = {'name': name_new, 'vocabulary_id': voc['id']} context = {'user': '******', 'model': model, 'session': model.Session} toolkit.get_action('tag_create')(context, tag_dict)
def dataontosearch_dataset_search(context, data_dict): ''' Perform a semantic search using DataOntoSearch. The parameters and returned JSON is designed to be compatible with the regular search (package_search) as far as possible, though only the q parameter is supported. Some additional information from DataOntoSearch is also available. :param q: the query to use when searching :type q: string :rtype: dictionary with 'concepts' that matched the query, a 'count' of results and 'results' with a list of datasets that matched. For each dataset, their similarity 'score' and similar 'concepts' are available in addition to the usual information given in package_show. For each concept, their RDF IRI is available as 'uri', human-readable label as 'label' and similarity score as 'similarity' ''' toolkit.check_access(u'dataontosearch_dataset_search', context, data_dict) query = toolkit.get_or_bust(data_dict, u'q') parameters = { u'q': query, u'd': 0, } if get_use_autotag(): parameters[u'a'] = 1 r = make_search_get_request(u'/search', parameters) r.raise_for_status() data = r.json() results = data[u'results'] query_concepts = data[u'concepts'] processed_results = [] for result in results: # Extract the ID of this dataset dataset_id = result[u'uri'].split(u'/')[-1] # Fetch information about this dataset try: dataset_info = toolkit.get_action(u'package_show')( None, { u'id': dataset_id, }) except toolkit.ObjectNotFound: # Perhaps not part of this CKAN? This should generally not happen, # and can indicate some trouble with configurations in # DataOntoSearch or changed ID or name in CKAN logger.warning( u'Skipping dataset %(uri)s returned from DataOntoSearch, not ' u'found in CKAN', {u'uri': result[u'uri']}, exc_info=True) continue except toolkit.NotAuthorized: # This may be a private dataset or something, so don't show it # TODO: Does CKAN reveal that the dataset exists, but is private? logger.debug( u'Skipping dataset %(uri)s since user is not authorized to see ' u'it', {u'uri': result[u'uri']}, exc_info=True) continue # Enrich with information from DataOntoSearch's result extra_info = { u'concepts': result[u'concepts'], u'score': result[u'score'], } dataset_info.update(extra_info) # Processed! processed_results.append(dataset_info) return { u'count': len(processed_results), u'results': processed_results, u'concepts': query_concepts, # Include dummy data for keys present in package_search u'sort': u'', u'facets': {}, u'search_facets': {} }
def publish(self, doi, pkg=None, context={}, *args, **kwargs): update_doi = kwargs.get('update', False) # dataset data package_id = pkg['id'] url = config.get('ckan.site_url', '') + '/dataset/' + pkg.get( 'name', pkg['id']) if self.url_prefix: url = self.url_prefix + pkg.get('name', pkg['id']) if update_doi: log.debug("*** Updating id = {0}, url = {1}".format( package_id, url)) # check published data match published_ids = self.get_doi_identifiers(doi) if published_ids and package_id not in published_ids and pkg.get( 'name') not in published_ids: return None, 'Dataset id ({0}, {1}) do not match published ids: [{2}]'.format( package_id, pkg.get('name'), ', '.join(published_ids)) else: log.debug("Publishing id = {0}, url = {1}".format(package_id, url)) # get converted package metadata_format = 'datacite' try: converted_package = toolkit.get_action('package_export')( context, { 'id': package_id, 'format': metadata_format }) except toolkit.ObjectNotFound: return None, 'Dataset not found' xml = converted_package.replace('\n', '').replace('\t', '') # Validate try: converted_record = XMLRecord.from_record( Record( MetadataFormats().get_metadata_formats(metadata_format)[0], xml)) validation_result = converted_record.validate() log.debug("Validation result: {0}".format(validation_result)) except Exception as e: log.error("Converted Validation FAILED, exception: {0}".format(e)) traceback.print_exc() validation_result = False if not validation_result: return None, 'Dataset XML validation failed' # encode 64 xml_bytes = xml if isinstance(xml, str): xml_bytes = xml.encode('utf-8') xml_encoded = base64.b64encode(xml_bytes) # prepare JSON headers = {"Content-Type": "application/vnd.api+json"} auth = HTTPBasicAuth(self.account_name, self.account_password) data = collections.OrderedDict() data['id'] = doi data['type'] = 'dois' data['attributes'] = collections.OrderedDict() # TODO check for update if this state is correct if update_doi: data['attributes']['event'] = "" else: data['attributes']['event'] = "publish" data['attributes']['doi'] = doi data['attributes']['url'] = url data['attributes']['xml'] = xml_encoded.decode() args = {'data': data} args_json = json.dumps(args) # log.debug(args_json) datacite_url_endpoint = self.datacite_url if update_doi: datacite_url_endpoint = self.datacite_url + '/' + doi log.debug( " REST request send to URL: {0}".format(datacite_url_endpoint)) if update_doi: r = requests.put(datacite_url_endpoint, headers=headers, auth=auth, data=args_json) else: r = requests.post(datacite_url_endpoint, headers=headers, auth=auth, data=args_json) # print(r.status_code) # print(r.json()) if r.status_code == 201 or r.status_code == 200: published_doi = r.json().get('data').get('id') return published_doi, None else: if update_doi: return None, 'Error updating to DataCite: HTTP Code: {0}, error: {1}'.format( r.status_code, r.json()) else: return None, 'Error publishing to DataCite: HTTP Code: {0}, error: {1}'.format( r.status_code, r.json())
def identify(self): '''Identify which user (if any) is logged in via simple SSO header. If a logged in user is found, set toolkit.c.user to be their user name. ''' logger = logging.getLogger(__name__) shib_partyid = request.headers.get('partyId') logger.debug(request.headers) if not getattr(c, u'user', None): c.user = None if shib_partyid is None: logger.debug("ESAS Identity not Found in HEADER") if shib_partyid is not None and c.user is None: logger.debug("ESAS Identity Found in Header") shib_email = request.headers.get('email') gives = text_to_id(request.headers.get('Legalgivennames')) fams = text_to_id(request.headers.get('Legalfamilyname')) nonumgives = re.sub('[0-9]+', '', gives) nonumfam = re.sub('[0-9]+', '', fams) shib_username = (alphabet_position(gives) + '_' + alphabet_position(fams)).lower() shib_fullname = nonumgives + ' ' + nonumfam logger.debug("partyId = \"{0}\"".format(shib_partyid)) logger.debug("email = \"{0}\"".format(shib_email)) logger.debug("username = \"{0}\"".format(shib_username)) logger.debug("fullname = \"{0}\"".format(shib_fullname)) check_user = get_user_by_userid(shib_partyid) # ESAS user is logged in and exists in CKAN if c.user and c.user == check_user['name']: logger.debug( "User logged in already username = \"{0}\"".format( user['name'])) # Check if ESAS email for user has changed. # If it has changed then update user email to match # CKAN is not system of record for email. # Changes as needed to match ESAS header. current_email = get_email_by_userid(shib_partyid) if shib_email != current_email: logger.info("ESAS: A user account has changed email.") check_user = toolkit.get_action('user_update')( context={ 'ignore_auth': True, 'user': '******' }, data_dict={ 'id': shib_partid, 'email': shib_email }) elif c.user and c.user != check_user['name']: # User already logged in and ESAS header does not match logger.info( "ESAS: User already logged in to CKAN - \"{0}\"".format( c.user['name'])) logger.info("ESAS: Username in header - \"{0}\"".format( c.user['name'])) logger.info("ESAS: User being set to username in ESAS header.") elif check_user is not None and c.user is None: # User not logged in and ESAS header exists c.user = check_user['name'] else: # A user with this username doesn't yet exist in CKAN # - so create one. logger.info("ESAS: user not found. Creating new CKAN user.") check_user = toolkit.get_action('user_create')( context={ 'ignore_auth': False, 'user': '******' }, data_dict={ 'email': shib_email, 'id': shib_partyid, 'name': shib_username, 'fullname': shib_fullname, 'password': generate_password() }) logger.debug("username = \"{0}\"".format(check_user['name']))
def _check_access(self, id): context = self._context() pkg = tk.get_action('package_show')(context, {'id': id}) if not check_journal_role(pkg, 'admin') and not h.check_access('sysadmin'): tk.abort(401, 'Unauthorized to manage DOIs')
def register(self, id, template): """ register at da|ra """ self._check_access(id) context = self._context() if params()['test'] or params()['test_register']: doi_key = 'dara_DOI_Test' a = {201: ('dara_registered_test', 'Dataset registered (Test)'), 200: ('dara_updated_test', 'Dataset updated (Test)')} else: doi_key = 'dara_DOI' a = {201: ('dara_registered', 'Dataset registered'), 200: ('dara_updated', 'Dataset updated')} def store(): d = doi.pkg_doi(c.pkg_dict) c.pkg_dict.update({doi_key: d}) date = '{:%Y-%m-%d %H:%M:%S}'.format(datetime.now()) k = get_in([dara, 0], a) c.pkg_dict[k] = date tk.get_action('package_update')(context, c.pkg_dict) def response(): if dara in a.iterkeys(): store() h.flash_success(get_in([dara, 1], a)) else: h.flash_error("ERROR! Sorry, dataset has not been registered or\ updated. Please contact your friendly sysadmin. ({})\ ".format(dara)) tk.redirect_to('dara_doi', id=id) def register_resources(): def reg(resource): resource_id = resource['id'] c.resource = tk.get_action('resource_show')(context, {'id': resource_id}) xml = self.xml(id, 'package/resource.xml') dara = darapi(auth(), xml, test=params()['test'], register=params()['register']) if dara in a.iterkeys(): c.resource[doi_key] = doi.res_doi(c.resource) tk.get_action('resource_update')(context, c.resource) else: h.flash_error("ERROR! Resource {} could not be registered ({}).\ Dataset has not been registered".format(resource_id, dara)) tk.redirect_to('dara_doi', id=id) c.pkg_dict = tk.get_action('package_show')(context, {'id': id}) resources = filter(lambda res: res['id'] in tk.request.params, c.pkg_dict['resources']) map(reg, resources) # first register resources register_resources() # register package. we must first get the pkg with the updated resources to # get their DOIs/URLs c.pkg_dict = tk.get_action('package_show')(context, {'id': id}) dara = darapi(auth(), self.xml(id, template), test=params()['test'], register=params()['register']) response()
def _get_action(cls, action_name): return tk.get_action(action_name)
def get_harvester_count(): return len(get_action('harvest_source_list')({}, {'all': True}))