def get_catalog_description(self): try: active_cache = config.get('ckan.cache.active', 'false') catalog = None # type: CatalogDcatApOp if active_cache == 'true': # get the ds from cache catalog_string = redis_cache.get_from_cache( self.cache_id, pool=redis_cache.MISC_POOL) if catalog_string: catalog = pickle.loads(catalog_string) log.info('Load catalog from cache: {0}'.format( self.cache_id)) if active_cache != 'true' or catalog is None: self.get_description_from_ts() redis_cache.flush_all_from_db(redis_cache.MISC_POOL) redis_cache.set_value_in_cache(self.cache_id, pickle.dumps(self), 864000, pool=redis_cache.MISC_POOL) return catalog except BaseException as e: log.error( "[Catalog]. Get Catalog description failed for {0}".format( self.catalog_uri)) log.error(traceback.print_exc(e)) return None
def catalogue_list(context, data_dict): """ :param context: :param data_dict: :return: """ active_cache = config.get('ckan.cache.active', 'false') catalog_list = None if active_cache == 'true': # get the ds from cache catalogue_string = redis_cache.get_from_cache( 'catalogue_list', pool=redis_cache.MISC_POOL) if catalogue_string: catalog_list = pickle.loads(catalogue_string) log.info('Load catalogue list from cache') if not catalog_list: catalog = CatalogDcatApOp('TMP') # type: CatalogDcatApOp catalog_list = catalog.get_list_catalogs() if active_cache == 'true': redis_cache.set_value_no_ttl_in_cache('catalogue_list', pickle.dumps(catalog_list), pool=redis_cache.MISC_POOL) return catalog_list
def test_ts_and_cache_equality(self): name_or_id = uri_prefix_test + "dgt-translation-memory-V1-2" ts_dataset = None # type: DatasetDcatApOp cache_dataset = None # type: DatasetDcatApOp ts_dataset = DatasetDcatApOp(name_or_id) ts_dataset.get_description_from_ts() dataset_string = redis_cache.get_from_cache(name_or_id, pool=redis_cache.DATASET_POOL) if dataset_string: cache_dataset = pickle.loads(dataset_string) assert_equal(ts_dataset.schema.__dict__, cache_dataset.schema.__dict__)
def controlled_vocabulary_group_from_db(self, property_member): """ validate the specif case in which the controlled vocabulary is ion the db. :param str property_member: :return: ValidationTypeResult|None """ def get_name_from_uri(uri): """ get the local name of the uri based on the template */localname :param str uri: :return str: """ try: # name_from_uri = uri.rsplit("/", 1)[1].lower() # In this version one uses the group name as uri name_from_uri = uri.lower() return name_from_uri except BaseException as e: log.error( "Validation. get_name_from uri failed. [uri: {0}]".format( uri)) return None try: list_uris = getattr(self.schema_to_validate, property_member, None) # type: dict[str, SchemaGeneric] validation_result = ValidationTypeResult.success if list_uris: for group in list_uris.values(): # :type group: SchemaGeneric key = "controlled_vocabulary_group_from_db_" + group.uri cached_validation_result = redis_cache.get_from_cache( key, pool=redis_cache.MISC_POOL) if cached_validation_result is None: name_from_uri = get_name_from_uri(group.uri) group_in_db = model.Group.get(name_from_uri) if group_in_db: cached_validation_result = 'True' else: cached_validation_result = 'False' redis_cache.set_value_in_cache( key, cached_validation_result, pool=redis_cache.MISC_POOL) if cached_validation_result == 'False': break validation_result = ValidationTypeResult.success if cached_validation_result == 'True' else ValidationTypeResult.error return validation_result except BaseException as e: log.error( "Validation. controlled_vocabulary_group_from_db failed. [Property {0}]. [uri: {0}] " .format(property_member, self.schema_to_validate.uri)) log.error(traceback.print_exc(e)) return None
def __get_from_cache(self, mdr_uri, concept): if config.get('ckan.cache.active', 'false') != 'true': return concept key = MD5(mdr_uri).hexdigest() mdr_pickled = redis_cache.get_from_cache( key, pool=redis_cache.VOCABULARY_POOL) if mdr_pickled: concept = pickle.loads(mdr_pickled) return concept return concept
def get_vocabulary_description(self): try: active_cache = config.get('ckan.cache.active', 'false') vocabulary = None # type: ConceptSchemaSkosWrapper if active_cache == 'true': # get the ds from cache vocabulary_string = redis_cache.get_from_cache(self.cache_id) if vocabulary_string: catalog = pickle.loads(vocabulary_string) log.info('Load ConceptSchemaSkosWrapper from cache: {0}'.format(self.cache_id)) if active_cache != 'true' or vocabulary is None: self.get_description_from_ts() redis_cache.set_value_in_cache(self.cache_id, pickle.dumps(self), 864000) return vocabulary except BaseException as e: log.error("[Vocabulary]. Get ConceptSchemaSkosWrapper description failed for {0}".format(self.uri))
def organization_list(context, data_dict): '''Return a list of the names of the site's organization. :param order_by: the field to sort the list by, must be ``'name'`` or ``'packages'`` (optional, default: ``'name'``) Deprecated use sort. :type order_by: string :param sort: sorting of the search results. Optional. Default: "name asc" string of field name and sort-order. The allowed fields are 'name' and 'packages' :type sort: string :param groups: a list of names of the groups to return, if given only groups whose names are in this list will be returned (optional) :type groups: list of strings :param all_fields: return full group dictionaries instead of just names (optional, default: ``False``) :type all_fields: boolean :rtype: list of strings ''' key = json.dumps(data_dict) active_cache = config.get('ckan.cache.active', 'false') organizations = None if active_cache == 'true': organization_str = redis_cache.get_from_cache(key, pool=redis_cache.MISC_POOL) if organization_str: organizations = pickle.loads(organization_str) else: organizations = logic.action.get.organization_list(context, data_dict) redis_cache.set_value_no_ttl_in_cache(key,pickle.dumps(organizations), pool=redis_cache.MISC_POOL) if context.get('for_view', False): # in the web UI only list publishers with published datasets # depending upon the context, group['packages'] may be either a # count of the packages, or the actual list of packages if organizations and isinstance(organizations[0]['packages'], int): organizations = [g for g in organizations if g['packages'] > 0] else: organizations = [g for g in organizations if len(g['packages']) > 0] return organizations if 'sort' in data_dict.keys() else sorted(organizations, key=sort_organization)
def __init__(self): start = time.time() try: active_cache = config.get('ckan.cache.active', 'true') cv = None if active_cache == 'true': # get the ConrolledVocabulary from cache controlled_voc_string = redis_cache.get_from_cache("ControlledVocabulary_Mapping", pool=redis_cache.VOCABULARY_POOL) if controlled_voc_string: cv = pickle.loads(controlled_voc_string) log.info('Load controlled vocabulary mapping from cache') self.__dict__.update(cv.__dict__) if active_cache !='true' or cv is None: self.controlled_file_types = retrieve_all_file_types() self.controlled_file_types_with_context = retrieve_all_file_types_with_context() self.controlled_frequencies = retrieve_all_frequencies() self.controlled_status = retrieve_all_datasets_status() self.controlled_languages = retrieve_all_languages() self.controlled_distribution_types = retrieve_all_distribution_types() self.controlled_documentation_types = retrieve_all_documentation_types() self.controlled_publishers = retrieve_all_publishers() self.controlled_country = retrieve_all_country_types() self.controlled_time_period = retrieve_all_time_periods() self.controlled_notation_types = retrieve_all_notation_types() self.controlled_license = retrieve_all_license() self.controlled_eurovoc_concepts= retrieve_all_aurovoc_concept() self.controlled_data_themes= retrieve_all_data_themes() self.controlled_access_rights = retrieve_all_access_rights() self.controlled_adms = retrieve_all_adms() self.controlled_datasets_types = retrieve_all_dataset_types() redis_cache.set_value_in_cache("ControlledVocabulary_Mapping",pickle.dumps(self),864000, pool=redis_cache.VOCABULARY_POOL) except BaseException as e: log.error("[ControlledVocabulary]. Build ControlledVocabulary mapping failed") traceback.print_exc(e) duration = time.time()-start log.info("[Duration] get Controlled vocabulary mapping took {0}".format(duration))
def publisher_export(self): publisher = request.params.get('publisher', '') locale = tk.request.environ['CKAN_LANG'] or config.get('ckan.locale_default', 'en') cache_key = 'global_export:{0}:{1}'.format(locale, publisher) dict_string = cache.get_from_cache(cache_key, pool=cache.MISC_POOL) result = None if dict_string: start_time = time.time() result = pickle.loads(dict_string) duration = time.time()-start_time log.info("Loading json took {0}".format(duration)) else: query_string = '''select d.id, d.name, d.title, p.name, p.title, r.id, r.name, r.resource_type, r.url, r.format, r.mimetype, r.last_modified, r.size, tem.score, tem.reason from package d join resource_group rg on d.id = rg.package_id join resource r on rg.id = r.resource_group_id join "group" p on d.owner_org = p.id join (select sr.entity_id, sr.value as score, ds.value as reason from task_status sr join task_status ds on sr.entity_id = ds.entity_id where sr.key = 'openness_score' and ds.key = 'openness_score_reason') tem on tem.entity_id = r.id where p.name = '%s' order by p.name, d.name ''' % (publisher) rows = model.Session.execute(query_string) try: data_list = [{'dataset_id': (row[0] or '').encode('utf8'), 'dataset_name': (row[1] or '').encode('utf8'), 'dataset_title': (row[2] or '').encode('utf8'), 'publisher_name': (row[3] or '').encode('utf8'), 'publisher_title': (row[4] or '').encode('utf8'), 'resource_id': (row[5] or '').encode('utf8'), 'resource_name': (row[6] or '').encode('utf8'), 'resource_resource_type': (row[7] or '').encode('utf8'), 'resource_url': (row[8] or '').encode('utf8'), 'resource_format': (row[9] or '').encode('utf8'), 'resource_mimetype': (row[10] or '').encode('utf8'), 'resource_last_modidied': str(row[11]) or '', 'resource_size': row[12] or '', 'openness_score': row[13] or '', 'openness_reason': row[14] or '',} for row in rows] cache.set_value_in_cache(cache,pickle.dumps(data_list), pool=cache.MISC_POOL) except Exception, e: log.info('halt')
def dataset_list(self, id): package_type = 'dataset' try: context = {'model': model, 'user': c.user or c.author, 'auth_user_obj': c.userobj} check_access('openness', context) except NotAuthorized: abort(401, _('Not authorized to see this page')) publisher = model.Group.get(id) locale = tk.request.environ['CKAN_LANG'] or config.get('ckan.locale_default', 'en') cache_key = 'global_openness:{0}:{1}'.format(locale, id) dict_string = cache.get_from_cache(cache_key, pool=cache.MISC_POOL) result = None if dict_string: start_time = time.time() result = pickle.loads(dict_string) duration = time.time()-start_time log.info("Loading json took {0}".format(duration)) else: result = self._openness_sores_for_dataset(id) result['owner_org'] = publisher.id result['owner_org_name'] = publisher.name result['json'] = json.dumps(result['totals']) #cache.set_value_in_cache(cache_key,pickle.dumps(result), pool=redis_cache.MISC_POOL) c.pkg_dict = result c.publishers = logic.get_action('organization_list')(context, {'all_fields': True}) self._setup_template_variables(context, {}, package_type=package_type) # c.form = base.render(self._package_form(package_type='upload_package'), extra_vars=vars) return base.render('openness/dataset_list.html')
def load_dataset(self, dataset_uri): ''' Load dataset from Triple store or the cache :return: type: DatasetDcatApOp ''' dataset = None # type: DatasetDcatApOp dataset_string = redis_cache.get_from_cache(dataset_uri, pool=redis_cache.MISC_POOL) if dataset_string: dataset = pickle.loads(dataset_string) log.info( '[Dataset] [LOAD from cache] [URI:<{0}>]'.format(dataset_uri)) if not dataset or not dataset.schema: dataset = DatasetDcatApOp(dataset_uri) loaded_from_public = dataset.get_description_from_ts() if not loaded_from_public: dataset.set_state_as_private() loaded_from_private = dataset.get_description_from_ts() log.info("[Dataset] [Load from private] [URI:<{0}>]".format( dataset_uri)) return dataset
def _package_search(data_dict): """ Helper method that wraps the package_search action. * unless overridden, sorts results by modified_date date * unless overridden, sets a default item limit """ context = { 'model': model, 'session': model.Session, 'user': c.user or c.author, 'auth_user_obj': c.userobj } if 'sort' not in data_dict or not data_dict['sort'] or data_dict[ 'sort'] == 'modified_date desc': data_dict['sort'] = 'modified_date desc' try: page = int(request.params.get('page', 1)) except ValueError, e: abort(400, ('"page" parameter must be an integer')) data_dict['group'] = 'true' data_dict['group.query'] = [ '-organization:estat AND -organization:comp AND -organization:grow', 'organization:estat', 'organization:comp', 'organization:grow' ] data_dict['group.format'] = 'simple' data_dict['rows'] = 2147483646 start = (page - 1) * ITEMS_LIMIT result_cache_key = '{0}:{1}'.format(json.dumps(data_dict), start) count_cache_key = '{0}:{1}:{2}'.format(json.dumps(data_dict), start, 'count') result_list_string = cache.get_from_cache(result_cache_key, pool=cache.MISC_POOL) count_cache = cache.get_from_cache(count_cache_key, pool=cache.MISC_POOL) if result_list_string and count_cache: return int(count_cache), pickle.loads(result_list_string) else: try: query = get_action('custom_package_search')(context, data_dict.copy()) except SearchError as se: log.warning(se) import traceback log.error(traceback.print_exc(se)) abort(400, ('Search query could not be parsed')) cached_result = [] for name, group in query['groups'].iteritems(): cached_result += group['doclist']['docs'] #result_list = customsearch.check_solr_result(context, cached_result[start:], ITEMS_LIMIT) result_list = [] for item in cached_result[start:start + ITEMS_LIMIT]: try: get_action('package_show')(context, {'id': item.get('id')}) result_list.append(context.get('package')) except NotFound as e: log.warning('Package show: {0} Dataset not found'.format( item.get('id'))) cache.set_value_in_cache(result_cache_key, pickle.dumps(result_list), pool=cache.MISC_POOL) cache.set_value_in_cache(count_cache_key, query['count'], pool=cache.MISC_POOL) return query['count'], result_list
log.warning('Package show: {0} Dataset not found'.format( item.get('id'))) cache.set_value_in_cache(result_cache_key, pickle.dumps(result_list), pool=cache.MISC_POOL) cache.set_value_in_cache(count_cache_key, query['count'], pool=cache.MISC_POOL) return query['count'], result_list result_cache_key = '{0}'.format(json.dumps(data_dict)) count_cache_key = '{0}:{1}'.format(json.dumps(data_dict), 'count') result_list_string = cache.get_from_cache(result_cache_key, pool=cache.MISC_POOL) count_cache = cache.get_from_cache(count_cache_key, pool=cache.MISC_POOL) if result_list_string and count_cache: return int(count_cache), pickle.loads(result_list_string) if 'rows' not in data_dict or not data_dict['rows']: data_dict['rows'] = ITEMS_LIMIT # package_search action modifies the data_dict, so keep our copy intact. try: context['internal'] = True query = __common_search(context, data_dict.copy()) cache.set_value_in_cache(result_cache_key, pickle.dumps(query['results']),
def legacy_package_show(context, data_dict): '''Return the metadata of a dataset (package) and its resources. This overrides core package_show to deal with DCAT-AP data :param str uri: the uri of the dataset :rtype: dictionary ''' import ckanext.ecportal.model.mapping.old_model_mapper as mapper if config.get('ckan.ecodp.backward_compatibility', 'true') in 'false, False': raise logic.NotFound('Function not available') uri_prefix = '{0}/{1}'.format(config.get('ckan.ecodp.uri_prefix'), 'dataset') dataset_uri_ckan2odp = data_dict.get("objectUri") if dataset_uri_ckan2odp: name_or_id = dataset_uri_ckan2odp elif data_dict.get("id"): name_or_id = '{0}/{1}'.format(uri_prefix, data_dict.get("id")) else: name_or_id = data_dict.get( "uri" ) # or 'http://data.europa.eu/999/dataset/dgt-translation-memory-V1-2' if not name_or_id: raise DataError('No id provided') active_cache = config.get('ckan.cache.active', 'false') dataset = None # type: DatasetDcatApOp if active_cache == 'true': # get the ds from cache dataset_string = redis_cache.get_from_cache( name_or_id, pool=redis_cache.DATASET_POOL) if dataset_string: dataset = pickle.loads(dataset_string) log.info('Load dataset from cache: {0}'.format(name_or_id)) # dataset = DatasetDcatApOp(name_or_id,dataset_json) if not dataset or not dataset.schema: dataset = DatasetDcatApOp(name_or_id) # todo optimize the code if not dataset.get_description_from_ts() and ( context.get('auth_user_obj', None) or context.get('ignore_auth', False) == True): dataset.set_state_as_private() #private dataset should not be cached active_cache = 'false' if not dataset.get_description_from_ts(): raise logic.NotFound(_('ecodp.dcat.dataset.not_found')) log.info('Load dataset from ts: {0}'.format(name_or_id)) if active_cache == 'true': redis_cache.set_value_no_ttl_in_cache(name_or_id, pickle.dumps(dataset)) if not dataset.schema: raise logic.NotFound('ecodp.dcat.dataset.not_found') context['package'] = dataset permission = _check_access('package_show', context, data_dict) if not permission: raise logic.NotAuthorized() package_dict = mapper.package_show_schema(dataset) return package_dict
def get_resources_of_datasets(self, publisher='none', graph_name='<dcatapop-public>'): ''' :param graph_name: :return: type list ''' result = [] try: str_res = redis_cache.get_from_cache( 'get_resources_of_datasets:{0}'.format(publisher), pool=redis_cache.MISC_POOL) if str_res: result = pickle.loads(str_res) return result except Exception as e: import traceback log.error('{0}'.format(e)) log.error(traceback.print_exc()) try: select_query = " select ?resource ?dataset_name ?dataset_title ?publisher from {0} where {{ " \ " {{ " \ " ?ds a <http://www.w3.org/ns/dcat#Dataset> ." \ " ?ds <http://www.w3.org/ns/dcat#distribution> ?resource ." \ " ?ds <http://purl.org/dc/terms/title> ?dataset_title ." \ " ?ds <http://purl.org/dc/terms/publisher> ?publisher ." \ " ?ds <http://data.europa.eu/88u/ontology/dcatapop#ckanName> ?dataset_name" \ " filter (lang(?dataset_title) in ('en',''))" \ " }}" \ " union " \ " {{" \ " ?ds a <http://www.w3.org/ns/dcat#Dataset> ." \ " ?ds foaf:page ?resource ." \ " ?ds <http://purl.org/dc/terms/title> ?dataset_title ." \ " ?ds <http://data.europa.eu/88u/ontology/dcatapop#ckanName> ?dataset_name ." \ " ?ds <http://purl.org/dc/terms/publisher> ?publisher ." \ " filter (lang(?dataset_title) in ('en',''))" \ " }} }}".format(graph_name) result = self.execute_select_query_auth(select_query) list_final = None if publisher != 'none': list_final = [] for res in result: if res.get('publisher').get('value').split( '/')[-1].lower() == publisher: list_final.append({ 'dataset_name': res['dataset_name']['value'], 'dataset_title': res['dataset_title']['value'], 'resource': res['resource']['value'], 'publisher': res['publisher']['value'] }) #list_final = (x for x in result if x.get('publisher').split('/')[-1].lower() == publisher) else: list_final = {} for res in result: list_final[res['resource']['value'].split('/')[-1]] = { 'dataset_name': res['dataset_name']['value'], 'dataset_title': res['dataset_title']['value'], 'resource': res['resource']['value'], 'publisher': res['publisher']['value'] } # list_final.append( # {'dataset_name': res['dataset_name']['value'], 'dataset_title': res['dataset_title']['value'], # 'resource': res['resource']['value'], 'publisher': res['publisher']['value']}) redis_cache.set_value_in_cache( 'get_resources_of_datasets:{0}'.format(publisher), pickle.dumps(list_final), 86400, pool=redis_cache.MISC_POOL) return list_final except BaseException as e: import traceback log.error('{0}'.format(e)) log.error(traceback.print_exc()) return None
def catalogue_show(context, data_dict): """ :param context: :param data_dict: :return: """ start = time.time() uri_prefix = '{0}/{1}'.format(config.get('ckan.ecodp.uri_prefix'), 'catalogue') if data_dict.get("id"): name_or_id = '{0}/{1}'.format(uri_prefix, data_dict.get("id")) else: name_or_id = data_dict.get( "uri" ) # or 'http://data.europa.eu/999/dataset/dgt-translation-memory-V1-2' if not name_or_id: raise DataError('No id provided') active_cache = config.get('ckan.cache.active', 'false') catalogue = None # type: CatalogDcatApOp if active_cache == 'true': # get the ds from cache catalogue_string = redis_cache.get_from_cache( name_or_id, pool=redis_cache.DATASET_POOL) if catalogue_string: catalogue = pickle.loads(catalogue_string) log.info('Load catalogue from cache: {0}'.format(name_or_id)) # dataset = DatasetDcatApOp(name_or_id,dataset_json) if not catalogue or not catalogue.schema: catalogue = CatalogDcatApOp(name_or_id) # todo optimize the code loaded = catalogue.get_description_from_ts() if not loaded and (context.get('auth_user_obj', None) or context.get('ignore_auth', False) == True): catalogue.set_state_as_private() #private dataset should not be cached active_cache = 'false' loaded = catalogue.get_description_from_ts() if not loaded: raise logic.NotFound('Package show: catalogue {0} {1}'.format( name_or_id, _('ecodp.dcat.dataset.not_found'))) if active_cache == 'true': redis_cache.set_value_no_ttl_in_cache( name_or_id, pickle.dumps(catalogue), pool=redis_cache.DATASET_POOL) if not catalogue.schema: raise logic.NotFound('Package show: dataset {0} {1}'.format( name_or_id, _('ecodp.dcat.dataset.not_found'))) context['catalogue'] = catalogue if context.get('internal'): log.info('Catalogue show internal took {0} sec'.format(time.time() - start)) return catalogue package_dict = {} #{'rdf': catalogue.get_dataset_as_rdfxml()} package_dict['catalogue'] = catalogue.schema.schema_dictaze() log.info('Catalogue show took {0} sec'.format(time.time() - start)) return package_dict
def history(self, id): import time package_type = self._get_package_type(id.split('@')[0]) start = time.time() try: context = { 'model': model, 'session': model.Session, 'user': c.user or c.author, 'auth_user_obj': c.userobj } data_dict = {'id': id} c.pkg_dict = get_action('package_show')(context, data_dict) pkg_revisions = None pkg_revisions_str = redis_cache.get_from_cache( 'rss_revisions:{0}'.format(id), pool=redis_cache.MISC_POOL) if pkg_revisions_str: pkg_revisions = pickle.loads(pkg_revisions_str) else: pkg_revisions = get_action('package_revision_list')(context, data_dict) redis_cache.set_value_no_ttl_in_cache( 'rss_revisions:{0}'.format(id), pickle.dumps(pkg_revisions), pool=redis_cache.MISC_POOL) c.pkg_revisions = pkg_revisions c.package = context.get('package') dataset = context.get('package') #type: DatasetDcatApOp list_revisions = None revision_list_str = redis_cache.get_from_cache( 'rss_history:{0}'.format(id), pool=redis_cache.MISC_POOL) if revision_list_str: list_revisions = pickle.loads(revision_list_str) else: list_revisions = dataset.get_list_revisions_ordred(20) redis_cache.set_value_no_ttl_in_cache( 'rss_history:{0}'.format(id), pickle.dumps(list_revisions), pool=redis_cache.MISC_POOL) log.info( '****************** rss1 /history took {0} sec**********************' .format((time.time() - start))) start = time.time() if 'diff' in request.params or 'selected1' in request.params: try: params = { 'id': request.params.getone('pkg_name'), 'diff': request.params.getone('selected1'), 'oldid': request.params.getone('selected2'), } except KeyError, e: if 'pkg_name' in dict(request.params): id = request.params.getone('pkg_name') c.error = \ _('Select two revisions before doing the comparison.') else: params['diff_entity'] = 'package' data_dict['list_revisions'] = list_revisions diff_report = diff_datasets(context, data_dict, params.get('diff'), params.get('oldid')) c.diff = diff_report.get('diff_dict') c.diff.sort() c.revision_to = {} c.revision_from = {} c.revision_to = { 'timestamp': diff_report['revision_to_time'], 'id': params.get('oldid') } c.revision_from = { 'timestamp': diff_report['revision_from_time'], 'id': params.get('diff') } c.name = diff_report.get('title', '') c.diff_entity = 'package' c.dataset_id = dataset.dataset_uri.split('/')[-1] return base.render('revision/diff.html') # h.redirect_to(controller='revision', action='diff', **params) c.pkg = context['package'] log.info( '****************** rss2 /history took {0} sec**********************' .format((time.time() - start)))
def get_skos_hierarchy(context,max_element=None): """ :param context: :param max_element: :return: """ result = OrderedDict() ts_query_helper = TripleStoreQueryHelpers() user = context.get('user',None) cache_key = '' if user: cache_key = 'skos_hierarchy_{0}'.format(user) else: cache_key = 'skos_hierarchy' dict_string=cache.get_from_cache(cache_key, pool=cache.MISC_POOL) if dict_string: start_time = time.time() result = json.loads(dict_string) duration = time.time()-start_time log.info("[DB] Loading json took {0}".format(duration)) else: try: graph_list = [] graph_list.append('dcatapop-public') start1 = time.time() package_count_public = ts_query_helper.get_package_count_by_publisher(graph_list) log.info('1st package count query took {0}s'.format(time.time()-start1)) graph_list.append('dcatapop-private') start2 = time.time() packag_count_all = ts_query_helper.get_package_count_by_publisher(graph_list) log.info('2nd package count query took {0}s'.format(time.time()-start2)) factory = ControlledVocabularyFactory() publ_mdr = factory.get_controlled_vocabulary_util(ControlledVocabularyFactory.CORPORATE_BODY) #type: CorporateBodiesUtil publ_hierarchie = publ_mdr.get_publisher_hierarchy() for top_level, children in publ_hierarchie.items(): sum_count = 0 pub_id = top_level.split('/')[-1].lower() if auth.has_user_permission_for_group_or_org(pub_id, user, 'read') : sum_count += packag_count_all.get(top_level) or 0 else: sum_count += package_count_public.get(top_level) or 0 interim = {'children': []} for child in children: count = 0 pub_id = child.split('/')[-1].lower() if auth.has_user_permission_for_group_or_org(pub_id, user, 'read') : count += packag_count_all.get(child) or 0 else: count += package_count_public.get(child) or 0 if count > 0: interim['children'].append((child,count)) sum_count += count interim['total'] = sum_count result[top_level] = interim cache.set_value_in_cache(cache_key,json.dumps(result ), pool=cache.MISC_POOL) except Exception, e: log.error('Error during querying the groups in get_skos_hierarchy: %s' % e) import traceback log.error(traceback.print_exc()) return {}
def publisher_list(self): package_type = 'dataset' try: context = {'model': model, 'user': c.user or c.author, 'auth_user_obj': c.userobj} check_access('openness', context) except NotAuthorized: abort(401, _('Not authorized to see this page')) c.pkg_dict = {} c.publishers = logic.get_action('organization_list')(context, {'all_fields': True}) report = {} locale = tk.request.environ['CKAN_LANG'] or config.get('ckan.locale_default', 'en') cache_key = 'global_openness:{0}'.format(locale) g_start_time = time.time() dict_string = cache.get_from_cache(cache_key, pool=cache.MISC_POOL) if dict_string: start_time = time.time() report = pickle.loads(dict_string) duration = time.time()-start_time log.info("Loading json took {0}".format(duration)) else: osp_start = time.time() rows = self._openness_sores_for_publisher() osp_duration = time.time()- osp_start log.info("_openness_sores_for_publisher took {0}".format(osp_duration)) ps_start = time.time() publishers = model.Group.all(group_type='organization') dict_pub = {} for publisher in publishers: dict_pub[publisher.name] = publisher for ds_name, obj in rows.iteritems(): if not obj.get('owner_org'): continue publisher = dict_pub.get(obj.get('owner_org').split('/')[-1].lower(),model.Group()) #publisher = next((pub for pub in publishers if pub.name == model.Group.get(obj.get('owner_org').split('/')[-1].lower())),None) publ_report = report.get(publisher.name or publisher.id, {'publisher_name': publisher.title, 'publisher_id': publisher.name or publisher.id, 'zero': 0, 'one': 0, 'two': 0, 'three': 0, 'four': 0, 'five': 0, 'sum': 0, 'avg': 0}) column_key = self._set_dataset_score(obj['sum_value']) publ_report[column_key] += 1 publ_report = self.calculate_sum(publ_report) publ_report = self.calculate_avg(publ_report) report[publisher.name] = publ_report #cache.set_value_in_cache(cache_key,pickle.dumps(report), pool=redis_cache.MISC_POOL) ps_duration = time.time()- ps_start log.info("publisher list 1st loop took {0}".format(ps_duration)) totals = {'zero': 0, 'one': 0, 'two': 0, 'three': 0, 'four': 0, 'five': 0} for publ, value in report.iteritems(): totals['zero'] = value['zero'] + totals['zero'] totals['one'] = value['one'] + totals['one'] totals['two'] = value['two'] + totals['two'] totals['three'] = value['three'] + totals['three'] totals['four'] = value['four'] + totals['four'] totals['five'] = value['five'] + totals['five'] common_formats = self.get_format_summary_list() result = {} result['table'] = report result['totals'] = totals result['json'] = json.dumps(totals) result['common_formats'] = json.dumps(common_formats) c.pkg_dict = result self._setup_template_variables(context, {}, package_type=package_type) g_duration = time.time()-g_start_time log.info("Global Loading took {0}".format(g_duration)) # c.form = base.render(self._package_form(package_type='upload_package'), extra_vars=vars) return base.render('openness/publisher_list.html')
class ECODPGroupController(GroupController): def index(self): start = time.time() group_type = self._guess_group_type() language = tk.request.environ['CKAN_LANG'] or config.get('ckan.locale_default', 'en') context = {'model': model, 'session': model.Session, 'user': c.user or c.author, 'for_view': True, 'with_private': False} q = c.q = request.params.get('q', '') data_dict = {'all_fields': True, 'q': q} sort_by = c.sort_by_selected = request.params.get('sort') if sort_by: data_dict['sort'] = sort_by try: self._check_access('site_read', context) except NotAuthorized: abort(401, _('Not authorized to see this page')) # pass user info to context as needed to view private datasets of orgs correctly if c.userobj: context['user_id'] = c.userobj.id context['user_is_admin'] = c.userobj.sysadmin results = self._action('group_list')(context, data_dict) c.amount_group_displayed = amount_group_displayed c.groups = results[:amount_group_displayed] c.hasmoregroups = len(results) > amount_group_displayed c.themes = self._action('theme_list')(context, {}) c.catalogs = CatalogDcatApOp.get_ui_list_catalogs(config.get('ckan.locale_default', 'en')) c.amount_catalog_displayed = amount_catalog_displayed #c.page = h.Page( # collection=results, # page=request.params.get('page', 1), # url=h.pager_url, # items_per_page=21 #) # @cache.cache('cached_render', expire=3600) def cached_render(user, languge, group_type): _render = base.render(group_type) return _render start_render = time.time() _render = cached_render(context.get('user'), language, self._index_template(group_type)) duration_render= time.time() - start_render log.info("Duration index render. {0}".format(duration_render)) duration = time.time() - start log.info("Duration index. {0}".format(duration)) return _render def read(self, id, limit=20): start = time.time() if request.GET.get('ext_boolean') in ['all', 'any', 'exact']: base.session['ext_boolean'] = request.GET['ext_boolean'] base.session.save() language = tk.request.environ['CKAN_LANG'] or config.get('ckan.locale_default', 'en') group_type = self._get_group_type(id.split('@')[0]) if (group_type != self.group_type) and (group_type != "eurovoc_domain"): abort(404, _('Incorrect group type')) context = {'model': model, 'session': model.Session, 'user': c.user or c.author, 'schema': self._db_to_form_schema(group_type=group_type), 'for_view': True} # Do not query for the group datasets when dictizing, as they will # be ignored and get requested on the controller anyway data_dict = {'id': id, 'include_datasets': False} # unicode format (decoded from utf8) q = c.q = request.params.get('q', '') try: stratqs = time.time() if 'eurovoc_domain' in data_dict.get('id','') : raise NotFound('EurovocDomains are not available any more') result = self._action('group_show_read')(context, data_dict) c.group_dict = result.get('group_dict', None) context["group"] = result.get('group') durationgs = time.time() - stratqs log.info("Duration group show read. {0}".format(durationgs)) c.group = context['group'] except NotFound: abort(404, _('Group not found')) except NotAuthorized: abort(401, _('Unauthorized to read group %s') % id) self._read(id, limit) start_render = time.time() # @cache.cache('render_cached_read', expire=3600) def render_cached(user, id, language, group_type): _render = base.render(self._read_template(c.group_dict['type'])) return _render _render = render_cached(context.get('user'), id, language, c.group_dict['type']) duration_render = time.time() - start_render # _render = base.render(self._read_template(c.group_dict['type'])) duration = time.time() - start log.info("Duration read_group. {0}".format(duration)) return _render def _read(self, id, limit=20): import time language = tk.request.environ['CKAN_LANG'] or config.get('ckan.locale_default', 'en') start = time.time() ''' This is common code used by both read and bulk_process''' group_type = self._get_group_type(id.split('@')[0]) context = {'model': model, 'session': model.Session, 'user': c.user or c.author, 'schema': self._db_to_form_schema(group_type=group_type), 'for_view': True, 'extras_as_string': True} q = c.q = request.params.get('q', '') # Search within group if q != u'': qTab = q.split(' ') checkbox = request.params.get('ext_boolean') if checkbox == 'all': q = '(' + ' AND '.join(qTab) + ')' elif checkbox == 'any': q = '(' + ' OR '.join(qTab) + ')' else: #checkbox == 'exact' q = '"' + q + '"' c.description_formatted = h.render_markdown(c.group_dict.get('description')) context['return_query'] = True # c.group_admins is used by CKAN's legacy (Genshi) templates only, # if we drop support for those then we can delete this line. c.group_admins = new_authz.get_group_or_org_admin_ids(c.group.id) try: page = int(request.params.get('page', 1)) except ValueError, e: abort(400, ('"page" parameter must be an integer')) # most search operations should reset the page counter: params_nopage = [(k, v) for k, v in request.params.items() if k != 'page'] new_params_nopage = [] for key, value in params_nopage: if key == 'eurovoc_domains': new_params_nopage.append(('groups', value)) else: new_params_nopage.append((key,value)) params_nopage = new_params_nopage #sort_by = request.params.get('sort', 'name asc') sort_by = request.params.get('sort', None) @cache.cache('search_url', expire=3600) def search_url(params): if group_type == 'organization': if c.action == 'bulk_process': url = self._url_for(controller='organization', action='bulk_process', id=id) else: url = self._url_for(controller='organization', action='read', id=id) else: url = self._url_for(controller='group', action='read', id=id) params = [(k, v.encode('utf-8') if isinstance(v, basestring) else str(v)) for k, v in params] return url + u'?' + urlencode(params) @cache.cache('drill_down_url', expire=3600) def drill_down_url(**by): return h.add_url_param(alternative_url=None, controller='group', action='read', extras=dict(id=c.group_dict.get('name')), new_params=by) c.drill_down_url = drill_down_url def remove_field(key, value=None, replace=None): if c.group_dict.get('is_organization'): return h.remove_url_param(key, value=value, replace=replace, controller='organization', action='read', extras=dict(id=c.group_dict.get('id'))) else: return h.remove_url_param(key, value=value, replace=replace, controller='group', action='read', extras=dict(id=c.group_dict.get('name'))) c.remove_field = remove_field def pager_url(q=None, page=None): params = list(params_nopage) params.append(('page', page)) return search_url(params) try: c.fields = [] search_extras = {} for (param, value) in request.params.items(): if not param in ['q', 'page', 'sort'] \ and len(value) and not param.startswith('_'): if not param.startswith('ext_'): c.fields.append((param, value)) param = 'eurovoc_domains' if (param == 'eurovoc_domains') else param; q += ' %s:"%s"' % (param, value) else: search_extras[param] = value fq = '' if c.group_dict.get('is_organization'): q += ' owner_org:"%s"' % c.group_dict.get('id') elif c.group_dict.get('name') not in q: q += ' groups:"%s"' % c.group_dict.get('name') fq = fq + ' capacity:"public"' user_member_of_orgs = [org['id'] for org in h.organizations_available('read')] if (c.group and c.group.id in user_member_of_orgs): fq = '' context['ignore_capacity_check'] = True facets = OrderedDict() default_facet_titles = {'organization': _('Organizations'), 'groups': _('Groups'), 'tags': _('Tags'), 'res_format': _('Formats'), 'license_id': _('Licenses')} for facet in g.facets: if facet in default_facet_titles: facets[facet] = default_facet_titles[facet] else: facets[facet] = facet # Facet titles for plugin in plugins.PluginImplementations(plugins.IFacets): if self.group_type == 'organization': facets = plugin.organization_facets( facets, self.group_type, None) else: facets = plugin.group_facets( facets, self.group_type, None) if 'capacity' in facets and (self.group_type != 'organization' or not user_member_of_orgs): del facets['capacity'] c.facet_titles = facets data_dict = { 'q': q, 'fq': fq, 'facet.field': facets.keys(), 'rows': limit, 'sort': sort_by, 'start': (page - 1) * limit, 'extras': search_extras, 'defType': 'edismax' } active_cache = config.get('ckan.cache.active', 'false') context_ = dict((k, v) for (k, v) in context.items() if k != 'schema') has_result = False dict_as_pickle = None if active_cache == 'true': dict_as_pickle = pickle.dumps(data_dict) query_json = redis.get_from_cache(dict_as_pickle, pool=redis.MISC_POOL) if query_json: query = pickle.loads(query_json) has_result = True if has_result == False: query = get_action('package_search')(context_, data_dict) if active_cache == 'true': redis.set_value_in_cache(dict_as_pickle, pickle.dumps(query), pool=redis.MISC_POOL) c.search_url_params = urlencode(_encode_params(params_nopage)) c.page = page_util.Page( collection=query['results'], page=page, url=pager_url, item_count=query['count'], items_per_page=limit ) c.group_dict['package_count'] = query['count'] c.facets = query['facets'] #maintain.deprecate_context_item('facets', 'Use `c.search_facets` instead.') c.search_facets = query['search_facets'] c.search_facets_limits = {} for facet in c.facets.keys(): limit = int(request.params.get('_%s_limit' % facet, g.facets_default_number)) c.search_facets_limits[facet] = limit c.page.items = query['results'] c.sort_by_selected = sort_by duration = time.time() - start log.info("Duration _read. {0}".format(duration)) except search.SearchError, se: log.error('Group search error: %r', se.args) c.query_error = True c.facets = {} c.page = page_util.Page(collection=[]) c.search_url_params = ''
def package_show(context, data_dict): '''Return the metadata of a dataset (package) and its resources. This overrides core package_show to deal with DCAT-AP data :param str uri: the uri of the dataset :rtype: dictionary ''' start = time.time() uri_prefix = '{0}/{1}'.format(config.get('ckan.ecodp.uri_prefix'), 'dataset') dataset_uri_ckan2odp = data_dict.get("objectUri") if dataset_uri_ckan2odp: name_or_id = dataset_uri_ckan2odp elif data_dict.get("id"): name_or_id = '{0}/{1}'.format(uri_prefix, data_dict.get("id")) else: name_or_id = data_dict.get( "uri" ) # or 'http://data.europa.eu/999/dataset/dgt-translation-memory-V1-2' if not name_or_id: raise DataError('No id provided') active_cache = config.get('ckan.cache.active', 'false') dataset = None # type: DatasetDcatApOp if active_cache == 'true': # get the ds from cache dataset_string = redis_cache.get_from_cache( name_or_id, pool=redis_cache.DATASET_POOL) if dataset_string: dataset = pickle.loads(dataset_string) log.info('Load dataset from cache: {0}'.format(name_or_id)) # dataset = DatasetDcatApOp(name_or_id,dataset_json) if not dataset or not dataset.schema: dataset = DatasetDcatApOp(name_or_id) graph_name = dataset.find_the_graph_in_ts() loaded = False if graph_name not in [ DCATAPOP_PRIVATE_GRAPH_NAME, DCATAPOP_PUBLIC_GRAPH_NAME ]: raise logic.NotFound('Package show: dataset {0} {1}'.format( name_or_id, _('ecodp.dcat.dataset.not_found'))) if graph_name == DCATAPOP_PUBLIC_GRAPH_NAME: dataset.set_state_as_public() loaded = dataset.get_description_from_ts() elif graph_name == DCATAPOP_PRIVATE_GRAPH_NAME and ( context.get('auth_user_obj', None) or context.get('ignore_auth', False) == True): dataset.set_state_as_private() active_cache = 'false' loaded = dataset.get_description_from_ts() if loaded: log.info('Load dataset from ts: {0}'.format(name_or_id)) else: log.info('Load dataset from ts failed: {0}'.format(name_or_id)) raise logic.NotFound('Package show: dataset {0} {1}'.format( name_or_id, _('ecodp.dcat.dataset.not_found'))) if active_cache == 'true' and loaded: redis_cache.set_value_no_ttl_in_cache(name_or_id, pickle.dumps(dataset)) if not dataset.schema and not loaded: raise logic.NotFound('Package show: dataset {0} {1}'.format( name_or_id, _('ecodp.dcat.dataset.not_found'))) context['package'] = dataset permission = _check_access('package_show', context, data_dict) if not permission: raise logic.NotAuthorized() if context.get('internal'): log.info('Package show internal took {0} sec'.format(time.time() - start)) context['package'] = dataset return dataset output_format = data_dict.get('output_format', u'standard') if output_format not in [u'standard', u'rdf', u'json']: output_format = u'standard' package_dict = {} if not output_format == u'json': package_dict['rdf'] = dataset.get_dataset_as_rdfxml() if not output_format == u'rdf': package_dict['dataset'] = dataset.schema.schema_dictaze() if not output_format == u'rdf': package_dict[ 'catalog_record'] = {} if not dataset.schema_catalog_record else dataset.schema_catalog_record.schema_dictaze( ) if output_format == u'standard': package_dict['capacity'] = dataset.privacy_state if context.get('for_view'): try: locale = tk.request.environ['CKAN_LANG'] except Exception: locale = config.get('ckan.locale_default', 'en') package_dict = ui_util.transform_dcat_schema_to_ui_schema( dataset, locale) # package_dict.update(ui_dict) for item in plugins.PluginImplementations(plugins.IPackageController): log.debug('Loaded plugin: {0}'.format(item.__class__.__name__)) package_dict = item.before_view(package_dict) for key, resource_dict in package_dict.get('distribution_dcat', {}).items(): resource_dict['id'] = resource_dict['uri'].split('/')[-1] for item in plugins.PluginImplementations( plugins.IResourceController): log.debug('Loaded plugin: {0}'.format(item.__class__.__name__)) resource_dict = item.before_show(resource_dict) # for item in plugins.PluginImplementations(plugins.IPackageController): # item.after_show(context, package_dict) log.info('Package show took {0} sec'.format(time.time() - start)) return package_dict