Ejemplo n.º 1
0
    def get_catalog_description(self):
        try:
            active_cache = config.get('ckan.cache.active', 'false')
            catalog = None  # type: CatalogDcatApOp

            if active_cache == 'true':
                # get the ds from cache
                catalog_string = redis_cache.get_from_cache(
                    self.cache_id, pool=redis_cache.MISC_POOL)
                if catalog_string:
                    catalog = pickle.loads(catalog_string)
                    log.info('Load catalog from cache: {0}'.format(
                        self.cache_id))

            if active_cache != 'true' or catalog is None:
                self.get_description_from_ts()
                redis_cache.flush_all_from_db(redis_cache.MISC_POOL)
                redis_cache.set_value_in_cache(self.cache_id,
                                               pickle.dumps(self),
                                               864000,
                                               pool=redis_cache.MISC_POOL)
            return catalog
        except BaseException as e:
            log.error(
                "[Catalog]. Get Catalog description failed for {0}".format(
                    self.catalog_uri))
            log.error(traceback.print_exc(e))
            return None
Ejemplo n.º 2
0
    def save_to_ts(self):
        """
            To insert or update the description of the Catalog in the TS.
            all the existing description in TS will be removed.
        :rtype: Boolean
        """
        try:
            tsch = TripleStoreCRUDHelpers()
            source_graph = self.graph_name
            target_graph_to_save = DCATAPOP_PUBLIC_GRAPH_NAME

            ttl_ds_from_ts = self.ttl_as_in_ts
            ttl_ds_last_version = self.build_the_graph().serialize(format="nt")
            r = tsch.execute_update_without_condition(source_graph,
                                                      target_graph_to_save,
                                                      ttl_ds_from_ts,
                                                      ttl_ds_last_version)
            log.info("[Catalog]. Save catalog successful [{0}]".format(
                self.catalog_uri))
            self.ttl_as_in_ts = ttl_ds_last_version
            active_cache = config.get('ckan.cache.active', 'false')
            if active_cache == 'true':
                redis_cache.set_value_in_cache(self.cache_id,
                                               pickle.dumps(self),
                                               864000,
                                               pool=redis_cache.MISC_POOL)

            return r
        except BaseException as e:
            log.error("[Catalog]. Save catalog failed [{0}]".format(
                self.catalog_uri))
            log.error(traceback.print_exc(e))
            return False
Ejemplo n.º 3
0
    def controlled_vocabulary_group_from_db(self, property_member):
        """
        validate the specif case in which the controlled vocabulary is ion the db.
        :param str property_member:
        :return: ValidationTypeResult|None
        """
        def get_name_from_uri(uri):
            """
            get the local name of the uri based on the template */localname
            :param str uri:
            :return str:
            """
            try:
                # name_from_uri = uri.rsplit("/", 1)[1].lower()
                # In this version one uses the group name as uri
                name_from_uri = uri.lower()

                return name_from_uri
            except BaseException as e:
                log.error(
                    "Validation. get_name_from uri failed. [uri: {0}]".format(
                        uri))
                return None

        try:
            list_uris = getattr(self.schema_to_validate, property_member,
                                None)  # type: dict[str, SchemaGeneric]
            validation_result = ValidationTypeResult.success
            if list_uris:
                for group in list_uris.values():
                    # :type group: SchemaGeneric
                    key = "controlled_vocabulary_group_from_db_" + group.uri
                    cached_validation_result = redis_cache.get_from_cache(
                        key, pool=redis_cache.MISC_POOL)
                    if cached_validation_result is None:
                        name_from_uri = get_name_from_uri(group.uri)
                        group_in_db = model.Group.get(name_from_uri)
                        if group_in_db:
                            cached_validation_result = 'True'
                        else:
                            cached_validation_result = 'False'
                        redis_cache.set_value_in_cache(
                            key,
                            cached_validation_result,
                            pool=redis_cache.MISC_POOL)
                    if cached_validation_result == 'False':
                        break
                validation_result = ValidationTypeResult.success if cached_validation_result == 'True' else ValidationTypeResult.error
            return validation_result
        except BaseException as e:
            log.error(
                "Validation. controlled_vocabulary_group_from_db failed. [Property {0}]. [uri: {0}] "
                .format(property_member, self.schema_to_validate.uri))
            log.error(traceback.print_exc(e))
            return None
    def get_vocabulary_description(self):
        try:

            active_cache = config.get('ckan.cache.active', 'false')
            vocabulary = None  # type: ConceptSchemaSkosWrapper

            if active_cache == 'true':
                # get the ds from cache
                vocabulary_string = redis_cache.get_from_cache(self.cache_id)
                if vocabulary_string:
                    catalog = pickle.loads(vocabulary_string)
                    log.info('Load ConceptSchemaSkosWrapper from cache: {0}'.format(self.cache_id))

            if active_cache != 'true' or vocabulary is None:
                self.get_description_from_ts()
                redis_cache.set_value_in_cache(self.cache_id, pickle.dumps(self), 864000)
            return vocabulary
        except BaseException as e:
            log.error("[Vocabulary]. Get ConceptSchemaSkosWrapper description failed for {0}".format(self.uri))
    def __init__(self):

        start = time.time()

        try:
            active_cache = config.get('ckan.cache.active', 'true')
            cv = None
            if active_cache == 'true':
                # get the ConrolledVocabulary from cache
                controlled_voc_string = redis_cache.get_from_cache("ControlledVocabulary_Mapping", pool=redis_cache.VOCABULARY_POOL)
                if controlled_voc_string:
                    cv = pickle.loads(controlled_voc_string)
                    log.info('Load controlled vocabulary mapping from cache')
                    self.__dict__.update(cv.__dict__)
            if active_cache !='true' or cv is None:
                self.controlled_file_types = retrieve_all_file_types()
                self.controlled_file_types_with_context = retrieve_all_file_types_with_context()
                self.controlled_frequencies = retrieve_all_frequencies()
                self.controlled_status = retrieve_all_datasets_status()
                self.controlled_languages = retrieve_all_languages()
                self.controlled_distribution_types = retrieve_all_distribution_types()
                self.controlled_documentation_types = retrieve_all_documentation_types()
                self.controlled_publishers = retrieve_all_publishers()
                self.controlled_country = retrieve_all_country_types()
                self.controlled_time_period = retrieve_all_time_periods()
                self.controlled_notation_types = retrieve_all_notation_types()
                self.controlled_license = retrieve_all_license()
                self.controlled_eurovoc_concepts= retrieve_all_aurovoc_concept()
                self.controlled_data_themes= retrieve_all_data_themes()
                self.controlled_access_rights = retrieve_all_access_rights()
                self.controlled_adms = retrieve_all_adms()
                self.controlled_datasets_types = retrieve_all_dataset_types()



                redis_cache.set_value_in_cache("ControlledVocabulary_Mapping",pickle.dumps(self),864000, pool=redis_cache.VOCABULARY_POOL)
        except BaseException as e:
            log.error("[ControlledVocabulary]. Build ControlledVocabulary mapping failed")
            traceback.print_exc(e)
        duration = time.time()-start
        log.info("[Duration] get Controlled vocabulary mapping took {0}".format(duration))
Ejemplo n.º 6
0
    def publisher_export(self):

        publisher = request.params.get('publisher', '')

        locale = tk.request.environ['CKAN_LANG'] or config.get('ckan.locale_default', 'en')
        cache_key = 'global_export:{0}:{1}'.format(locale, publisher)
        dict_string = cache.get_from_cache(cache_key, pool=cache.MISC_POOL)
        result = None
        if dict_string:
            start_time = time.time()
            result = pickle.loads(dict_string)
            duration = time.time()-start_time
            log.info("Loading json took {0}".format(duration))
        else:

            query_string = '''select d.id, d.name, d.title, p.name, p.title, r.id, r.name, r.resource_type, r.url, r.format, r.mimetype, r.last_modified, r.size, tem.score, tem.reason
                        from package d join resource_group rg on d.id = rg.package_id join resource r on rg.id = r.resource_group_id join "group" p on d.owner_org = p.id
                        join (select sr.entity_id, sr.value as score, ds.value as reason from task_status sr join task_status ds on sr.entity_id = ds.entity_id	where sr.key = 'openness_score' and ds.key = 'openness_score_reason') tem on tem.entity_id = r.id
                        where p.name = '%s'
                        order by p.name, d.name  ''' % (publisher)
            rows = model.Session.execute(query_string)
            try:
                data_list = [{'dataset_id': (row[0] or '').encode('utf8'),
                              'dataset_name': (row[1] or '').encode('utf8'),
                              'dataset_title': (row[2] or '').encode('utf8'),
                              'publisher_name': (row[3] or '').encode('utf8'),
                              'publisher_title': (row[4] or '').encode('utf8'),
                              'resource_id': (row[5] or '').encode('utf8'),
                              'resource_name': (row[6] or '').encode('utf8'),
                              'resource_resource_type': (row[7] or '').encode('utf8'),
                              'resource_url': (row[8] or '').encode('utf8'),
                              'resource_format': (row[9] or '').encode('utf8'),
                              'resource_mimetype': (row[10] or '').encode('utf8'),
                              'resource_last_modidied': str(row[11]) or '',
                              'resource_size': row[12] or '',
                              'openness_score': row[13] or '',
                              'openness_reason': row[14] or '',} for row in rows]
                cache.set_value_in_cache(cache,pickle.dumps(data_list), pool=cache.MISC_POOL)
            except Exception, e:
                log.info('halt')
Ejemplo n.º 7
0
class ECODPGroupController(GroupController):


    def index(self):
        start = time.time()
        group_type = self._guess_group_type()
        language = tk.request.environ['CKAN_LANG'] or config.get('ckan.locale_default', 'en')
        context = {'model': model, 'session': model.Session,
                   'user': c.user or c.author, 'for_view': True,
                   'with_private': False}

        q = c.q = request.params.get('q', '')
        data_dict = {'all_fields': True, 'q': q}
        sort_by = c.sort_by_selected = request.params.get('sort')
        if sort_by:
            data_dict['sort'] = sort_by
        try:
            self._check_access('site_read', context)
        except NotAuthorized:
            abort(401, _('Not authorized to see this page'))

        # pass user info to context as needed to view private datasets of orgs correctly
        if c.userobj:
            context['user_id'] = c.userobj.id
            context['user_is_admin'] = c.userobj.sysadmin

        results = self._action('group_list')(context, data_dict)

        c.amount_group_displayed = amount_group_displayed

        c.groups = results[:amount_group_displayed]
        c.hasmoregroups = len(results) > amount_group_displayed
        c.themes = self._action('theme_list')(context, {})
        c.catalogs = CatalogDcatApOp.get_ui_list_catalogs(config.get('ckan.locale_default', 'en'))
        c.amount_catalog_displayed = amount_catalog_displayed

        #c.page = h.Page(
        #    collection=results,
        #    page=request.params.get('page', 1),
        #    url=h.pager_url,
        #    items_per_page=21
        #)
        # @cache.cache('cached_render', expire=3600)
        def cached_render(user, languge, group_type):
            _render = base.render(group_type)
            return _render
        start_render = time.time()
        _render = cached_render(context.get('user'), language, self._index_template(group_type))

        duration_render= time.time() - start_render
        log.info("Duration index  render. {0}".format(duration_render))
        duration = time.time() - start
        log.info("Duration index. {0}".format(duration))
        return _render

    def read(self, id, limit=20):

        start = time.time()
        if request.GET.get('ext_boolean') in ['all', 'any', 'exact']:
            base.session['ext_boolean'] = request.GET['ext_boolean']
            base.session.save()
        language = tk.request.environ['CKAN_LANG'] or config.get('ckan.locale_default', 'en')

        group_type = self._get_group_type(id.split('@')[0])
        if (group_type != self.group_type) and (group_type != "eurovoc_domain"):
            abort(404, _('Incorrect group type'))

        context = {'model': model, 'session': model.Session,
                   'user': c.user or c.author,
                   'schema': self._db_to_form_schema(group_type=group_type),
                   'for_view': True}
        # Do not query for the group datasets when dictizing, as they will
        # be ignored and get requested on the controller anyway
        data_dict = {'id': id, 'include_datasets': False}

        # unicode format (decoded from utf8)
        q = c.q = request.params.get('q', '')

        try:
            stratqs = time.time()
            if 'eurovoc_domain' in data_dict.get('id','') :
                raise NotFound('EurovocDomains are not available any more')
            result = self._action('group_show_read')(context, data_dict)
            c.group_dict = result.get('group_dict', None)
            context["group"] = result.get('group')
            durationgs = time.time() - stratqs
            log.info("Duration group show read. {0}".format(durationgs))
            c.group = context['group']
        except NotFound:
            abort(404, _('Group not found'))
        except NotAuthorized:
            abort(401, _('Unauthorized to read group %s') % id)

        self._read(id, limit)

        start_render = time.time()

        # @cache.cache('render_cached_read', expire=3600)
        def render_cached(user, id, language,  group_type):
            _render = base.render(self._read_template(c.group_dict['type']))
            return _render

        _render = render_cached(context.get('user'), id, language, c.group_dict['type'])
        duration_render = time.time() - start_render



        # _render = base.render(self._read_template(c.group_dict['type']))
        duration = time.time() - start
        log.info("Duration read_group. {0}".format(duration))
        return _render

    def _read(self, id, limit=20):
        import  time
        language = tk.request.environ['CKAN_LANG'] or config.get('ckan.locale_default', 'en')
        start = time.time()
        ''' This is common code used by both read and bulk_process'''
        group_type = self._get_group_type(id.split('@')[0])
        context = {'model': model, 'session': model.Session,
                   'user': c.user or c.author,
                   'schema': self._db_to_form_schema(group_type=group_type),
                   'for_view': True, 'extras_as_string': True}

        q = c.q = request.params.get('q', '')
        # Search within group

        if q != u'':
            qTab = q.split(' ')
            checkbox = request.params.get('ext_boolean')
            if checkbox == 'all':
                q = '(' + ' AND '.join(qTab) + ')'
            elif checkbox == 'any':
                q = '(' + ' OR '.join(qTab) + ')'
            else: #checkbox == 'exact'
                q = '"' + q + '"'

        c.description_formatted = h.render_markdown(c.group_dict.get('description'))

        context['return_query'] = True

        # c.group_admins is used by CKAN's legacy (Genshi) templates only,
        # if we drop support for those then we can delete this line.
        c.group_admins = new_authz.get_group_or_org_admin_ids(c.group.id)

        try:
            page = int(request.params.get('page', 1))
        except ValueError, e:
            abort(400, ('"page" parameter must be an integer'))

        # most search operations should reset the page counter:
        params_nopage = [(k, v) for k, v in request.params.items()
                         if k != 'page']

        new_params_nopage = []
        for key, value in params_nopage:
            if key == 'eurovoc_domains':
                new_params_nopage.append(('groups', value))
            else:
                new_params_nopage.append((key,value))

        params_nopage = new_params_nopage

        #sort_by = request.params.get('sort', 'name asc')
        sort_by = request.params.get('sort', None)

        @cache.cache('search_url', expire=3600)
        def search_url(params):
            if group_type == 'organization':
                if c.action == 'bulk_process':
                    url = self._url_for(controller='organization',
                                        action='bulk_process',
                                        id=id)
                else:
                    url = self._url_for(controller='organization',
                                        action='read',
                                        id=id)
            else:
                url = self._url_for(controller='group', action='read', id=id)
            params = [(k, v.encode('utf-8') if isinstance(v, basestring)
                       else str(v)) for k, v in params]
            return url + u'?' + urlencode(params)

        @cache.cache('drill_down_url', expire=3600)
        def drill_down_url(**by):
            return h.add_url_param(alternative_url=None,
                                   controller='group', action='read',
                                   extras=dict(id=c.group_dict.get('name')),
                                   new_params=by)

        c.drill_down_url = drill_down_url

        def remove_field(key, value=None, replace=None):
            if c.group_dict.get('is_organization'):
                return h.remove_url_param(key, value=value, replace=replace,
                                      controller='organization', action='read',
                                      extras=dict(id=c.group_dict.get('id')))
            else:
                return h.remove_url_param(key, value=value, replace=replace,
                                      controller='group', action='read',
                                      extras=dict(id=c.group_dict.get('name')))

        c.remove_field = remove_field

        def pager_url(q=None, page=None):
            params = list(params_nopage)
            params.append(('page', page))
            return search_url(params)

        try:
            c.fields = []
            search_extras = {}
            for (param, value) in request.params.items():
                if not param in ['q', 'page', 'sort'] \
                        and len(value) and not param.startswith('_'):
                    if not param.startswith('ext_'):
                        c.fields.append((param, value))
                        param = 'eurovoc_domains' if (param == 'eurovoc_domains') else param;
                        q += ' %s:"%s"' % (param, value)
                    else:
                        search_extras[param] = value

            fq = ''
            if c.group_dict.get('is_organization'):
                q += ' owner_org:"%s"' % c.group_dict.get('id')
            elif c.group_dict.get('name') not in q:
                q += ' groups:"%s"' % c.group_dict.get('name')

            fq = fq + ' capacity:"public"'
            user_member_of_orgs = [org['id'] for org
                                   in h.organizations_available('read')]
            if (c.group and c.group.id in user_member_of_orgs):
                fq = ''
                context['ignore_capacity_check'] = True

            facets = OrderedDict()

            default_facet_titles = {'organization': _('Organizations'),
                                    'groups': _('Groups'),
                                    'tags': _('Tags'),
                                    'res_format': _('Formats'),
                                    'license_id': _('Licenses')}

            for facet in g.facets:
                if facet in default_facet_titles:
                    facets[facet] = default_facet_titles[facet]
                else:
                    facets[facet] = facet

            # Facet titles
            for plugin in plugins.PluginImplementations(plugins.IFacets):
                if self.group_type == 'organization':
                    facets = plugin.organization_facets(
                        facets, self.group_type, None)
                else:
                    facets = plugin.group_facets(
                        facets, self.group_type, None)

            if 'capacity' in facets and (self.group_type != 'organization' or
                                         not user_member_of_orgs):
                del facets['capacity']

            c.facet_titles = facets

            data_dict = {
                'q': q,
                'fq': fq,
                'facet.field': facets.keys(),
                'rows': limit,
                'sort': sort_by,
                'start': (page - 1) * limit,
                'extras': search_extras,
                'defType': 'edismax'
            }

            active_cache = config.get('ckan.cache.active', 'false')

            context_ = dict((k, v) for (k, v) in context.items() if k != 'schema')

            has_result = False
            dict_as_pickle = None

            if active_cache == 'true':
                dict_as_pickle = pickle.dumps(data_dict)
                query_json = redis.get_from_cache(dict_as_pickle, pool=redis.MISC_POOL)
                if query_json:
                    query = pickle.loads(query_json)
                    has_result = True

            if has_result == False:
                query = get_action('package_search')(context_, data_dict)
                if active_cache == 'true':
                    redis.set_value_in_cache(dict_as_pickle, pickle.dumps(query), pool=redis.MISC_POOL)


            c.search_url_params = urlencode(_encode_params(params_nopage))
            c.page = page_util.Page(
                collection=query['results'],
                page=page,
                url=pager_url,
                item_count=query['count'],
                items_per_page=limit
            )

            c.group_dict['package_count'] = query['count']
            c.facets = query['facets']
            #maintain.deprecate_context_item('facets', 'Use `c.search_facets` instead.')

            c.search_facets = query['search_facets']
            c.search_facets_limits = {}
            for facet in c.facets.keys():
                limit = int(request.params.get('_%s_limit' % facet,
                                               g.facets_default_number))
                c.search_facets_limits[facet] = limit
            c.page.items = query['results']

            c.sort_by_selected = sort_by
            duration = time.time() - start
            log.info("Duration _read. {0}".format(duration))


        except search.SearchError, se:
            log.error('Group search error: %r', se.args)
            c.query_error = True
            c.facets = {}
            c.page = page_util.Page(collection=[])
            c.search_url_params = ''
Ejemplo n.º 8
0
def _package_search(data_dict):
    """
    Helper method that wraps the package_search action.

     * unless overridden, sorts results by modified_date date
     * unless overridden, sets a default item limit
    """
    context = {
        'model': model,
        'session': model.Session,
        'user': c.user or c.author,
        'auth_user_obj': c.userobj
    }

    if 'sort' not in data_dict or not data_dict['sort'] or data_dict[
            'sort'] == 'modified_date desc':
        data_dict['sort'] = 'modified_date desc'

        try:
            page = int(request.params.get('page', 1))
        except ValueError, e:
            abort(400, ('"page" parameter must be an integer'))

        data_dict['group'] = 'true'
        data_dict['group.query'] = [
            '-organization:estat AND -organization:comp AND -organization:grow',
            'organization:estat', 'organization:comp', 'organization:grow'
        ]
        data_dict['group.format'] = 'simple'
        data_dict['rows'] = 2147483646

        start = (page - 1) * ITEMS_LIMIT

        result_cache_key = '{0}:{1}'.format(json.dumps(data_dict), start)
        count_cache_key = '{0}:{1}:{2}'.format(json.dumps(data_dict), start,
                                               'count')

        result_list_string = cache.get_from_cache(result_cache_key,
                                                  pool=cache.MISC_POOL)
        count_cache = cache.get_from_cache(count_cache_key,
                                           pool=cache.MISC_POOL)

        if result_list_string and count_cache:

            return int(count_cache), pickle.loads(result_list_string)

        else:
            try:
                query = get_action('custom_package_search')(context,
                                                            data_dict.copy())
            except SearchError as se:
                log.warning(se)
                import traceback
                log.error(traceback.print_exc(se))
                abort(400, ('Search query could not be parsed'))

            cached_result = []

            for name, group in query['groups'].iteritems():
                cached_result += group['doclist']['docs']

            #result_list = customsearch.check_solr_result(context, cached_result[start:], ITEMS_LIMIT)
            result_list = []
            for item in cached_result[start:start + ITEMS_LIMIT]:
                try:
                    get_action('package_show')(context, {'id': item.get('id')})
                    result_list.append(context.get('package'))
                except NotFound as e:
                    log.warning('Package show: {0} Dataset not found'.format(
                        item.get('id')))

            cache.set_value_in_cache(result_cache_key,
                                     pickle.dumps(result_list),
                                     pool=cache.MISC_POOL)
            cache.set_value_in_cache(count_cache_key,
                                     query['count'],
                                     pool=cache.MISC_POOL)

        return query['count'], result_list
Ejemplo n.º 9
0
                                              pool=cache.MISC_POOL)
    count_cache = cache.get_from_cache(count_cache_key, pool=cache.MISC_POOL)

    if result_list_string and count_cache:
        return int(count_cache), pickle.loads(result_list_string)

    if 'rows' not in data_dict or not data_dict['rows']:
        data_dict['rows'] = ITEMS_LIMIT

    # package_search action modifies the data_dict, so keep our copy intact.
    try:
        context['internal'] = True
        query = __common_search(context, data_dict.copy())

        cache.set_value_in_cache(result_cache_key,
                                 pickle.dumps(query['results']),
                                 pool=cache.MISC_POOL)
        cache.set_value_in_cache(count_cache_key,
                                 query['count'],
                                 pool=cache.MISC_POOL)
    except search.SearchError, se:
        log.error('Search error: %r', se.args)
        query = {'count': 0, 'results': []}
    except ValueError, se:
        log.error('Search error: %r', se.args)
        query = {'count': 0, 'results': []}

    return query['count'], query['results']


def __common_search(context, data_dict):
Ejemplo n.º 10
0
    def get_resources_of_datasets(self,
                                  publisher='none',
                                  graph_name='<dcatapop-public>'):
        '''

        :param graph_name:
        :return: type list
        '''
        result = []
        try:
            str_res = redis_cache.get_from_cache(
                'get_resources_of_datasets:{0}'.format(publisher),
                pool=redis_cache.MISC_POOL)
            if str_res:
                result = pickle.loads(str_res)
                return result
        except Exception as e:
            import traceback
            log.error('{0}'.format(e))
            log.error(traceback.print_exc())

        try:
            select_query = " select ?resource ?dataset_name ?dataset_title ?publisher from {0} where {{ " \
                           " {{ " \
                           " ?ds a  <http://www.w3.org/ns/dcat#Dataset> ." \
                           " ?ds <http://www.w3.org/ns/dcat#distribution> ?resource ." \
                           " ?ds <http://purl.org/dc/terms/title> ?dataset_title ." \
                           " ?ds <http://purl.org/dc/terms/publisher> ?publisher ." \
                           " ?ds <http://data.europa.eu/88u/ontology/dcatapop#ckanName> ?dataset_name" \
                           " filter (lang(?dataset_title) in ('en',''))" \
                           " }}" \
                           " union " \
                           " {{" \
                           "  ?ds a  <http://www.w3.org/ns/dcat#Dataset> ." \
                           " ?ds foaf:page ?resource ." \
                           " ?ds <http://purl.org/dc/terms/title> ?dataset_title ." \
                           " ?ds <http://data.europa.eu/88u/ontology/dcatapop#ckanName> ?dataset_name ." \
                           " ?ds <http://purl.org/dc/terms/publisher> ?publisher ." \
                           " filter (lang(?dataset_title) in ('en',''))" \
                           " }} }}".format(graph_name)

            result = self.execute_select_query_auth(select_query)
            list_final = None
            if publisher != 'none':
                list_final = []
                for res in result:
                    if res.get('publisher').get('value').split(
                            '/')[-1].lower() == publisher:
                        list_final.append({
                            'dataset_name':
                            res['dataset_name']['value'],
                            'dataset_title':
                            res['dataset_title']['value'],
                            'resource':
                            res['resource']['value'],
                            'publisher':
                            res['publisher']['value']
                        })
                #list_final =  (x for x in result if x.get('publisher').split('/')[-1].lower() == publisher)
            else:
                list_final = {}
                for res in result:
                    list_final[res['resource']['value'].split('/')[-1]] = {
                        'dataset_name': res['dataset_name']['value'],
                        'dataset_title': res['dataset_title']['value'],
                        'resource': res['resource']['value'],
                        'publisher': res['publisher']['value']
                    }
                    # list_final.append(
                    #     {'dataset_name': res['dataset_name']['value'], 'dataset_title': res['dataset_title']['value'],
                    #      'resource': res['resource']['value'], 'publisher': res['publisher']['value']})
            redis_cache.set_value_in_cache(
                'get_resources_of_datasets:{0}'.format(publisher),
                pickle.dumps(list_final),
                86400,
                pool=redis_cache.MISC_POOL)
            return list_final
        except BaseException as e:
            import traceback
            log.error('{0}'.format(e))
            log.error(traceback.print_exc())
            return None
Ejemplo n.º 11
0
def get_skos_hierarchy(context,max_element=None):
    """

    :param context:
    :param max_element:
    :return:
    """
    result = OrderedDict()

    ts_query_helper = TripleStoreQueryHelpers()
    user = context.get('user',None)
    cache_key = ''
    if user:
        cache_key = 'skos_hierarchy_{0}'.format(user)
    else:
        cache_key = 'skos_hierarchy'
    dict_string=cache.get_from_cache(cache_key, pool=cache.MISC_POOL)
    if dict_string:
        start_time = time.time()
        result = json.loads(dict_string)
        duration = time.time()-start_time
        log.info("[DB] Loading json took {0}".format(duration))
    else:
        try:
            graph_list = []
            graph_list.append('dcatapop-public')
            start1 = time.time()
            package_count_public = ts_query_helper.get_package_count_by_publisher(graph_list)
            log.info('1st package count query took {0}s'.format(time.time()-start1))


            graph_list.append('dcatapop-private')
            start2 = time.time()
            packag_count_all = ts_query_helper.get_package_count_by_publisher(graph_list)
            log.info('2nd package count query took {0}s'.format(time.time()-start2))

            factory = ControlledVocabularyFactory()
            publ_mdr = factory.get_controlled_vocabulary_util(ControlledVocabularyFactory.CORPORATE_BODY) #type: CorporateBodiesUtil
            publ_hierarchie = publ_mdr.get_publisher_hierarchy()

            for top_level, children in publ_hierarchie.items():
                sum_count = 0
                pub_id = top_level.split('/')[-1].lower()
                if auth.has_user_permission_for_group_or_org(pub_id, user, 'read') :
                    sum_count += packag_count_all.get(top_level) or 0
                else:
                    sum_count += package_count_public.get(top_level) or 0
                interim = {'children': []}
                for child in children:
                    count = 0
                    pub_id = child.split('/')[-1].lower()
                    if auth.has_user_permission_for_group_or_org(pub_id, user, 'read') :
                        count += packag_count_all.get(child) or 0
                    else:
                        count += package_count_public.get(child) or 0
                    if count > 0:
                        interim['children'].append((child,count))
                        sum_count += count
                interim['total'] = sum_count
                result[top_level] = interim

            cache.set_value_in_cache(cache_key,json.dumps(result ), pool=cache.MISC_POOL)
        except Exception, e:
           log.error('Error during querying the groups in get_skos_hierarchy: %s' % e)
           import traceback
           log.error(traceback.print_exc())
           return {}