コード例 #1
0
def _dge_harvest_list_dataset_field_labels(name_field=None, value_field=None):
    '''
    Returns the available values that the given dataset name_field may have to the given value_field
    '''
    result = {}
    if name_field is not None:
        dataset = sh.scheming_get_schema('dataset', 'dataset')
        values = sh.scheming_field_by_name(dataset.get('dataset_fields'),
                                           name_field) or []
        if values and values['choices']:
            for option in values['choices']:
                if option and option['value']:
                    if value_field:
                        if option['value'] == value_field:
                            return {
                                option.get('value'): {
                                    'label': option.get('label'),
                                    'description': option.get('description'),
                                    'dcat_ap': option.get('dcat_ap'),
                                    'notation': option.get('notation')
                                }
                            }
                    else:
                        result[option.get('value')] = {
                            'label': option.get('label'),
                            'description': option.get('description'),
                            'dcat_ap': option.get('dcat_ap'),
                            'notation': option.get('notation')
                        }
    return result
コード例 #2
0
ファイル: helpers.py プロジェクト: ckan/ckanext-fluent
def fluent_form_languages(field=None, entity_type=None, object_type=None,
        schema=None):
    """
    Return a list of language codes for this form (or form field)

    1. return field['form_languages'] if it is defined
    2. return schema['form_languages'] if it is defined
    3. get schema from entity_type + object_type then
       return schema['form_languages'] if they are defined
    4. return languages from site configuration
    """
    if field and 'form_languages' in field:
        return field['form_languages']
    if schema and 'form_languages' in schema:
        return schema['form_languages']
    if entity_type and object_type:
        # late import for compatibility with older ckanext-scheming
        from ckanext.scheming.helpers import scheming_get_schema
        schema = scheming_get_schema(entity_type, object_type)
        if schema and 'form_languages' in schema:
            return schema['form_languages']

    langs = []
    for l in get_available_locales():
        if l.language not in langs:
            langs.append(l.language)
    return langs
コード例 #3
0
ファイル: helpers.py プロジェクト: vrk-kpa/ckanext-fluent
def fluent_form_languages(field=None,
                          entity_type=None,
                          object_type=None,
                          schema=None):
    """
    Return a list of language codes for this form (or form field)

    1. return field['form_languages'] if it is defined
    2. return schema['form_languages'] if it is defined
    3. get schema from entity_type + object_type then
       return schema['form_languages'] if they are defined
    4. return languages from site configuration
    """
    if field and 'form_languages' in field:
        return field['form_languages']
    if schema and 'form_languages' in schema:
        return schema['form_languages']
    if entity_type and object_type:
        # late import for compatibility with older ckanext-scheming
        from ckanext.scheming.helpers import scheming_get_schema
        schema = scheming_get_schema(entity_type, object_type)
        if schema and 'form_languages' in schema:
            return schema['form_languages']

    langs = []
    for l in get_available_locales():
        if l.language not in langs:
            langs.append(l.language)
    return langs
コード例 #4
0
    def before_index(self, data_dict):
        dataset = sh.scheming_get_schema('dataset', 'dataset')
        if ('res_format' in data_dict):
            #Get format field
            formats = sh.scheming_field_by_name(dataset.get('resource_fields'),
                                                'format')

            #Create SOLR field
            data_dict['res_format_label'] = []
            for res_format in data_dict['res_format']:
                #Get format label
                res_format_label = sh.scheming_choices_label(
                    formats['choices'], res_format)
                if res_format_label:
                    #Add label to new SOLR field
                    data_dict['res_format_label'].append(res_format_label)

        if ('frequency' in data_dict):
            #Get frequency field
            frequency = data_dict['frequency']
            if frequency:
                freq = json.loads(frequency)
                ftype = freq['type']
                fvalue = freq['value']
                data_dict['frequency_id'] = '{value}-{type}'.format(
                    type=ftype, value=fvalue)
                data_dict[
                    'frequency_label'] = helpers.csc_dataset_display_frequency(
                        fvalue, ftype)
                #log.info('Frecuency = {f1}, frequency_id={f2}, frequency_label={f3}'.format(f1=frequency, f2=data_dict['frequency_id'], f3=data_dict['frequency_label']))

        if ('theme' in data_dict):
            #Get theme field
            categoria = sh.scheming_field_by_name(
                dataset.get('dataset_fields'), 'theme')

            #Get theme value
            valor_categoria = data_dict['theme']

            #Empty theme values
            data_dict['theme'] = []
            data_dict['theme_id'] = []
            data_dict['theme_es'] = []
            data_dict['theme_gl'] = []

            #Get key values
            valores = valor_categoria.replace('[', '').replace(']', '')
            categorias = valores.split('", "')
            #Get translated label for each key
            for term_categoria in list(categorias):
                clean_term = term_categoria.replace('"', '')
                data_dict['theme'].append(clean_term)
                data_dict['theme_id'].append(helpers.csc_theme_id(clean_term))
                #Look for label in the scheme
                for option in categoria.get('choices'):
                    if option['value'] == clean_term:
                        #Add label for each language
                        data_dict['theme_es'].append(option['label']['es'])
                        data_dict['theme_gl'].append(option['label']['gl'])
        return data_dict
コード例 #5
0
ファイル: helpers.py プロジェクト: abgov/ckanext-ab_scheming
def get_required_fields_name(dataset_type):
    dataset_scheme = h.scheming_get_schema('dataset', dataset_type)
    fields = dataset_scheme['dataset_fields']
    required_fields_name = []
    for f in fields:
        if f.get('required'):
            required_fields_name.append(f.get('field_name'))
    return  required_fields_name
コード例 #6
0
def get_required_fields_name(dataset_type):
    dataset_scheme = h.scheming_get_schema('dataset', dataset_type)
    fields = dataset_scheme['dataset_fields']
    required_fields_name = []
    for f in fields:
        if f.get('required'):
            required_fields_name.append(f.get('field_name'))
    return required_fields_name
コード例 #7
0
def has_published_date_field_in_schema(dataset_type):
    if not dataset_type:
        return False
    dataset_scheme = h.scheming_get_schema('dataset', dataset_type)
    fields = dataset_scheme['dataset_fields']
    pd = h.scheming_field_by_name(fields, "published_date")
    if not pd:
        return False
    return True
コード例 #8
0
def resource_required(dataset_type):
    if not dataset_type:
        return False
    dataset_scheme = h.scheming_get_schema('dataset', dataset_type)
    fields = dataset_scheme['dataset_fields']
    for f in fields:
        if f.get('validators') and re.search("ab_ps_resource_required",
                                             f.get('validators')):
            return True
    return False
コード例 #9
0
def get_required_fields_name_label_dict(dataset_type):
    if not dataset_type:
        return {}
    dataset_scheme = h.scheming_get_schema('dataset', dataset_type)
    fields = dataset_scheme['dataset_fields']
    required_dict = {}
    for f in fields:
        if f.get('required'):
            required_dict[f.get('field_name')] = f.get('label')
    return required_dict
コード例 #10
0
def _get_classfication_field(dataset_type):
    scheme = h.scheming_get_schema('dataset', dataset_type)
    if not scheme:
        return None
    fields = scheme.get('resource_fields')
    if not fields:
        return None
    f = h.scheming_field_by_name(fields, "classification")
    if f:
        return f
    else:
        return None
コード例 #11
0
def _csc_dcat_list_resource_field_values(name_field=None):
    '''
    Returns the available values that the given resource name_field may have
    '''
    result = []
    if name_field is not None:
        dataset = sh.scheming_get_schema('dataset', 'dataset')
        values = sh.scheming_field_by_name(dataset.get('resource_fields'),
                name_field) or []
        if values and values['choices']:
            for option in values['choices']:
                if option and option['value']:
                    result.append(option['value'])
    return result
コード例 #12
0
def dge_list_themes(themes=None):
    '''
    Given an theme list values, get theirs translated labels
    
    :param themes: value theme list
    :type string list
    
    :rtype (string, string) list
    '''
    dataset = sh.scheming_get_schema('dataset', 'dataset')
    formats = sh.scheming_field_by_name(dataset.get('dataset_fields'), 'theme')
    label_list = []
    for theme in themes:
        label = sh.scheming_choices_label(formats['choices'], theme)
        if label:
            label_list.append((dge_theme_id(theme), label))
    return label_list
コード例 #13
0
def dge_resource_format_label(res_format=None):
    '''
    Given an format, get its label
    
    :param res_format: format
    :type string
    
    :rtype string
    '''
    if format:
        dataset = sh.scheming_get_schema('dataset', 'dataset')
        formats = sh.scheming_field_by_name(dataset.get('resource_fields'),
                                            'format')
        res_format_label = sh.scheming_choices_label(formats['choices'],
                                                     res_format)
        if res_format_label:
            return res_format_label
    return res_format
コード例 #14
0
    def after_search(self, search_results, search_params):
        if not is_frontend():
            return search_results

    # Translate the unselected search facets.
        facets = search_results.get('search_facets')
        if not facets:
            return search_results

        desired_lang_code = pylons.request.environ['CKAN_LANG']
        fallback_lang_code = pylons.config.get('ckan.locale_default', 'es')

        # Look up translations for all of the facets in one db query.
        dataset = sh.scheming_get_schema('dataset', 'dataset')
        categoria = sh.scheming_field_by_name(dataset.get('dataset_fields'),
                                              'theme')
        dict_categoria = {}
        for option in categoria.get('choices'):
            label_option = (option.get('label')).get(desired_lang_code, None)
            if not label_option:
                label_option = (option.get('label')).get(
                    fallback_lang_code, None)
            dict_categoria[helpers.dge_theme_id(
                option.get('value'))] = label_option
        facet = facets.get('theme_id', None)
        if facet:
            for item in facet.get('items', None):
                item['display_name'] = dict_categoria.get(
                    item.get('name'), item.get('display_name'))
                item['class'] = item.get('name')

        facet = facets.get('administration_level', None)
        if facet:
            for item in facet.get('items', None):
                item[
                    'display_name'] = helpers.dge_get_translated_administration_level(
                        item.get('name'))
        return search_results
コード例 #15
0
    def after_search(self, search_results, search_params):

        # Translate the unselected search facets.
        facets = search_results.get('search_facets')
        if not facets:
            return search_results

        desired_lang_code = request.environ.get('CKAN_LANG')
        fallback_lang_code = config.get('ckan.locale_default', 'es')

        # Look up translations for all of the facets in one db query.
        dataset = sh.scheming_get_schema('dataset', 'dataset')
        categoria = sh.scheming_field_by_name(dataset.get('dataset_fields'),
                                              'theme')
        dict_categoria = {}
        for option in categoria.get('choices'):
            label_option = (option.get('label')).get(desired_lang_code, None)
            if not label_option:
                label_option = (option.get('label')).get(
                    fallback_lang_code, None)
            dict_categoria[helpers.csc_theme_id(
                option.get('value'))] = label_option
        facet = facets.get('theme_id', None)
        if facet:
            for item in facet.get('items', None):
                item['display_name'] = dict_categoria.get(
                    item.get('name'), item.get('display_name'))
                item['class'] = item.get('name')

        facet = facets.get('frequency_id', None)
        if facet:
            for item in facet.get('items', None):
                #log.info("facet {facet}".format(facet=facet))
                value = item.get('name', '').split('-')
                item['display_name'] = helpers.csc_dataset_display_frequency(
                    value[0], value[1])
        return search_results
コード例 #16
0
def dge_list_reduce_resource_format_label(resources=None, field_name='format'):
    '''
    Given an resource list, get label of resource_format
    
    :param resources: resource dict
    :type dict list
    
    :param field_name: field_name of resource
    :type string
    
    :rtype string list
    '''

    format_list = h.dict_list_reduce(resources, field_name)
    dataset = sh.scheming_get_schema('dataset', 'dataset')
    formats = sh.scheming_field_by_name(dataset.get('resource_fields'),
                                        'format')
    label_list = []
    for res_format in format_list:
        res_format_label = sh.scheming_choices_label(formats['choices'],
                                                     res_format)
        if res_format_label:
            label_list.append(res_format_label)
    return label_list
コード例 #17
0
def dge_harvest_catalog_show(context, data_dict):
    method_log_prefix = '[%s][dge_harvest_catalog_show]' % __name__
    output = None
    try:
        log.debug('%s Init method. Inputs context=%s, data_dict=%s' %
                  (method_log_prefix, context, data_dict))
        ini = datetime.datetime.now()
        toolkit.check_access('dge_harvest_catalog_show', context, data_dict)

        page = 1
        data_dict['page'] = page
        limit = data_dict.get('limit', -1)
        _format = data_dict.get('format')
        if _format == RDF_FORMAT:
            filepath = config.get('ckanext.dge_harvest.rdf.filepath',
                                  '/tmp/catalog.rdf')
        elif _format == CSV_FORMAT:
            filepath = config.get('ckanext.dge_harvest.csv.filepath',
                                  '/tmp/catalog.csv')
            columnsfilepath = config.get(
                'ckanext.dge_harvest.csv.columns.filepath',
                '/usr/lib/ckan/default/src/ckanext-dge-harvest/ckanext/dge_harvest/commands/columns.json'
            )
        else:
            filepath = '/tmp/catalog.' + _format
        query = _dge_harvest_search_ckan_datasets(context, data_dict)
        dataset_dicts = query['results']
        total_datasets = query['count']
        log.debug('%s Total_datasets obtenidos en la query: %s' %
                  (method_log_prefix, total_datasets))
        if limit > -1 and limit < total_datasets:
            total_datasets = limit
        num = len(dataset_dicts)
        log.debug('%s Total_datasets a exportar: %s' %
                  (method_log_prefix, total_datasets))

        while (total_datasets > num):
            page = page + 1
            data_dict['page'] = page
            query = _dge_harvest_search_ckan_datasets(context, data_dict)
            dataset_dicts.extend(query['results'])
            total_datasets = query['count']
            num = len(dataset_dicts)
            log.debug('%s Total_datasets obtenidos en la query: %s' %
                      (method_log_prefix, total_datasets))
            log.debug('%s Total_datasets a exportar: %s' %
                      (method_log_prefix, num))

        if _format == RDF_FORMAT:
            serializer = DGERDFSerializer()
            #log.debug("%s DATASET_DICTS = %s" % (method_log_prefix,dataset_dicts))
            output = serializer.serialize_catalog(
                {},
                dataset_dicts,
                _format=data_dict.get('format'),
                pagination_info=None)
        elif _format == CSV_FORMAT and columnsfilepath:
            #log.info('%s Dataset_dicts de partida =%s' % (method_log_prefix, dataset_dicts))
            organizations = {}
            themes = dhh.dge_harvest_dict_theme_option_label()
            spatial_coverages = dhh.dge_harvest_dict_spatial_coverage_option_label(
            )
            _dataset = sh.scheming_get_schema('dataset', 'dataset')
            res_format = sh.scheming_field_by_name(
                _dataset.get('resource_fields'), 'format')
            format_values = res_format['choices']
            formats = {}
            datasets = []
            num = 0
            for dataset in dataset_dicts:
                ds = {}
                #Id
                #ds['id'] = _encode_value(dataset.get('id', None))

                #ulr
                ds['url'] = dataset_uri(dataset)

                #Description
                descriptions = _from_dict_to_string(
                    dataset.get(dhc.DS_DESCRIPTION, None))
                ds['description'] = _encode_value(descriptions, True)

                #Title
                titles = _from_dict_to_string(
                    dataset.get(dhc.DS_TITLE_TRANSLATED, None))
                ds['title'] = _encode_value(titles, True)

                #Theme
                theme_values = dataset.get(dhc.DS_THEME, None)
                theme_labels = []
                if theme_values:
                    for value in theme_values:
                        theme = themes.get(value)
                        if theme and theme.get('label'):
                            theme_labels.append(theme.get('label').get('es'))
                    theme_value = _from_list_to_string(theme_labels)
                    ds['theme'] = _encode_value(theme_value, True)

                #Keywords
                tags = dataset.get(dhc.DS_TAGS)
                value = None
                if tags and len(tags) > 0:
                    for tag in tags:
                        stag = tag.get('name', None)
                        if stag:
                            if value:
                                value = '%s%s%s' % (value, MAIN_SEPARATOR,
                                                    stag)
                            else:
                                value = stag
                    ds['tags'] = _encode_value(value, True)

                #Identifier
                ds['identifier'] = _encode_value(
                    dataset.get('identifier', None), True)

                #Created
                ds['issued_date'] = _encode_value(
                    _from_iso8601_date_to_string(
                        dataset.get(dhc.DS_ISSUED_DATE, None)))

                #Modified
                ds['modified_date'] = _encode_value(
                    _from_iso8601_date_to_string(
                        dataset.get(dhc.DS_MODIFIED_DATE, None)))

                #Accrual Periodicity
                frequency = dataset.get(dhc.DS_FREQUENCY)
                if (frequency):
                    stype = frequency.get('type', '')
                    if stype and len(stype) > 0:
                        stype = 'http://www.w3.org/2006/time#' + stype
                    svalue = frequency.get('value', '')
                    sfrequency = '[TYPE]%s[VALUE]%s' % (stype, svalue)
                    ds['frequency'] = _encode_value(sfrequency, True)

                #Language
                languages = _from_list_to_string(dataset.get(dhc.DS_LANGUAGE))
                ds['language'] = _encode_value(languages, True)

                #Publisher
                publisher = dataset.get(dhc.DS_PUBLISHER, None)
                if publisher:
                    if publisher in organizations:
                        ds['publisher'] = _encode_value(
                            organizations.get(publisher, None), True)
                    else:
                        organization = h.get_organization(publisher, False)
                        if organization:
                            organizations[publisher] = organization.get(
                                'title',
                                organization.get('display_name', None))
                            ds['publisher'] = _encode_value(
                                organizations.get(publisher), True)

                #License
                ds['license_id'] = _encode_value(dataset.get(dhc.DS_LICENSE),
                                                 True)

                #Spatial
                spatial_values = dataset.get(dhc.DS_SPATIAL, None)
                spatial_labels = []
                if spatial_values:
                    for value in spatial_values:
                        spatial = spatial_coverages.get(value)
                        if spatial and spatial.get('label') and spatial.get(
                                'label').get('es'):
                            spatial_labels.append(
                                spatial.get('label').get('es'))
                    spatials = _from_list_to_string(spatial_labels)
                    ds['spatial'] = _encode_value(spatials, True)

                #Temporal
                temporal_coverage = dataset.get(dhc.DS_TEMPORAL_COVERAGE)
                if temporal_coverage:
                    value = None
                    for tc in temporal_coverage.itervalues():
                        if tc:
                            tc_from = _from_iso8601_date_to_string(
                                tc.get('from', None))
                            tc_to = _from_iso8601_date_to_string(
                                tc.get('to', None))
                            if tc_from or tc_to:
                                if value:
                                    value = '%s%s%s-%s' % (value,
                                                           MAIN_SEPARATOR,
                                                           (tc_from or ''),
                                                           (tc_to or ''))
                                else:
                                    value = '%s-%s' % ((tc_from or ''),
                                                       (tc_to or ''))
                    ds['coverage_new'] = _encode_value(value, True)

                #Valid
                ds['valid'] = _encode_value(
                    _from_iso8601_date_to_string(
                        dataset.get(dhc.DS_VALID, None)), True)

                #References
                references = _from_list_to_string(
                    dataset.get(dhc.DS_REFERENCE, None))
                ds['references'] = _encode_value(references, True)

                #Normative
                conforms_to = _from_list_to_string(
                    dataset.get(dhc.DS_NORMATIVE, None))
                ds['conforms_to'] = _encode_value(conforms_to, True)

                #Resources
                resources = dataset.get(dhc.DS_RESOURCES)
                sresources = []
                if resources:
                    for resource in resources:
                        sresource = None
                        if resource:
                            name = _from_dict_to_string(
                                resource.get(dhc.DS_RESOURCE_NAME_TRANSLATED,
                                             None), 'TITLE_')
                            if not name:
                                name = ''
                            url = resource.get(dhc.DS_RESOURCE_ACCESS_URL, '')
                            if url:
                                url = '[ACCESS_URL]%s' % (url)

                            format_value = resource.get(
                                dhc.DS_RESOURCE_FORMAT, None)
                            format = None
                            if format_value:
                                if format_value in formats:
                                    format = formats.get(format_value, None)
                                else:
                                    formats[
                                        format_value] = sh.scheming_choices_label(
                                            format_values, format_value)
                                    format = formats.get(format_value, None)
                            if format:
                                format = '[MEDIA_TYPE]%s' % (format)
                            size = resource.get(dhc.DS_RESOURCE_BYTE_SIZE, '')
                            if size:
                                size = '[BYTE_SIZE]%s' % (size)
                            relation = _from_list_to_string(
                                resource.get(dhc.DS_RESOURCE_RELATION, None),
                                SECONDARY_SEPARATOR)
                            relations = ''
                            if relation:
                                relations = '[RELATION]%s' % (relation)
                            sresource = '%s%s%s%s%s' % (name, url, format,
                                                        size, relations)
                            if sresource and len(sresource) > 0:
                                sresources.append(sresource)
                if len(sresources) > 0:
                    value = None
                    for item in sresources:
                        if value:
                            value = '%s%s%s' % (value, MAIN_SEPARATOR, item)
                        else:
                            value = item
                ds['resources'] = _encode_value(value, True)

                num = num + 1
                datasets.append(ds)
            #log.debug('%s Datasets con datos a exportar=%s' % (method_log_prefix, datasets))
            log.debug('%s Numero de datasets con datos a exportar...%s' %
                      (method_log_prefix, num))
            output = losser.losser.table(datasets,
                                         columnsfilepath,
                                         csv=True,
                                         pretty=False)

        if filepath:
            file = None
            try:
                file = open(filepath, "w")
                file.write(output)
                file.close()
            except:
                if file and not file.closed:
                    file.close()

        end = datetime.datetime.now()
        log.debug(
            "%s Time in serialize %s catalog [%s] with %s datasets ... %s milliseconds"
            % (method_log_prefix, _format, filepath, total_datasets,
               int((end - ini).total_seconds() * 1000)))
    except Exception, e:
        log.error("%s Exception %s: %s" %
                  (method_log_prefix, type(e).__name__, e))
        output = None
コード例 #18
0
    def graph_from_dataset(self, dataset_dict, dataset_ref):
        '''
        Given a CKAN dataset dict, creates an RDF graph

        The class RDFLib graph (accessible via `self.g`) should be updated on
        this method

        `dataset_dict` is a dict with the dataset metadata like the one
        returned by `package_show`. `dataset_ref` is an rdflib URIRef object
        that must be used to reference the dataset when working with the graph.
        '''
        method_log_prefix = '[%s][graph_from_dataset]' % type(
            self).__name__
        #log.debug('%s Init method. Inputs dataset_dict=%r, dataset_ref=%r' % (method_log_prefix, dataset_dict, dataset_ref))
        #log.debug('%s Init method. Inputs, dataset_ref=%r' % (method_log_prefix, dataset_ref))
        try:
            g = self.g

            for prefix, namespace in namespaces.iteritems():
                g.bind(prefix, namespace)

            g.add((dataset_ref, RDF.type, DCAT.Dataset))

            # Title
            self._add_translated_triple_field_from_dict(
                dataset_dict, dataset_ref, DCT.title, DS_TITLE_TRANSLATED, None)

            # Description
            self._add_translated_triple_field_from_dict(
                dataset_dict, dataset_ref, DCT.description, DS_DESCRIPTION, None)

            # Theme
            value = self._get_dict_value(dataset_dict, DS_THEME)
            if value:
                themes = dataset_dict.get(EXPORT_AVAILABLE_THEMES, {})
                for theme in value:
                    #self._add_resource_list_triple(dataset_ref, DCAT.theme, value)
                    theme_values = themes.get(theme, {})
                    labels = theme_values.get('label')
                    descriptions = theme_values.get('description')
                    dcat_ap = theme_values.get('dcat_ap')
                    notation = theme_values.get('notation')
                    self._add_resource_list_triple(
                        dataset_ref, DCAT.theme, theme, labels, descriptions, dcat_ap, notation)

            # Tags
            for tag in dataset_dict.get('tags', []):
                self.g.add(
                    (dataset_ref, DCAT.keyword, Literal(tag['name'])))

            # Identifier
            self._add_triple_from_dict(
                dataset_dict, dataset_ref, DCT.identifier, DS_IDENTIFIER, None, False, False)

            # Issued, Modified dates
            self._add_date_triple(dataset_ref, DCT.issued, self._get_value_from_dict(
                dataset_dict, DS_ISSUED_DATE, ['metadata_created']))
            self._add_date_triple(dataset_ref, DCT.modified, self._get_value_from_dict(
                dataset_dict, DS_MODIFIED_DATE, ['metadata_modified']))
            self._add_date_triple(dataset_ref, DCT.valid, self._get_value_from_dict(
                dataset_dict, DS_VALID, None))

            # Accrual periodicity
            frequency = dataset_dict.get(DS_FREQUENCY)
            if frequency:
                ftypes = {'seconds': TIME.seconds,
                          'minutes': TIME.minutes,
                          'hours': TIME.hours,
                          'days': TIME.days,
                          'weeks': TIME.weeks,
                          'months': TIME.months,
                          'years': TIME.years}
                ftype = frequency.get('type')
                fvalue = frequency.get('value')
                if ftype and ftype in ftypes.keys() and fvalue:
                    duration = BNode()
                    frequency = BNode()
                    g.add((frequency, RDF.type, DCT.Frequency))
                    g.add((duration, RDF.type, TIME.DurationDescription))
                    g.add((dataset_ref, DCT.accrualPeriodicity, frequency))
                    g.add((frequency, RDF.value, duration))
                    g.add((duration, ftypes.get(ftype), Literal(
                        fvalue, datatype=XSD.decimal)))

            # Languages
            self._add_triple_from_dict(
                dataset_dict, dataset_ref, DCT.language, DS_LANGUAGE, None, True, False)

            # Publisher
            pub_dir3 = False
            publishers = dataset_dict.get(
                EXPORT_AVAILABLE_PUBLISHERS, {})
            organization_id = dataset_dict.get('owner_org')
            if organization_id in publishers:
                publisher = publishers.get(organization_id)
            else:
                org = h.get_organization(organization_id, False)
                publisher = [None, None, None]
                if org:
                    publisher = [org.get('title'), None, None]
                    if org['extras']:
                        for extra in org.get('extras'):
                            if extra and 'key' in extra and extra['key'] == ORG_PROP_ID_UD_ORGANICA:
                                notation = extra.get('value')
                                if notation and notation != '':
                                    pub_dir3 = True
                                    publisher[1] = PUBLISHER_PREFIX + notation
                                    publisher[2] = notation
                if pub_dir3:
                    publishers[organization_id] = publisher
                    dataset_dict[EXPORT_AVAILABLE_PUBLISHERS] = publishers
                else:
                    #publisher 
                    organizations = cdh.csc_dcat_organizations_available()
                    publisher_ref = config.get('ckanext.csc_dcat.catalog.publisher', None)
                    if publisher_ref and len(publisher_ref.strip()) > 0:
                        publisher_ref = publisher_ref.strip()
                        publisher = [publisher_ref, None, None]
                        s_publisher = publisher_ref.upper().split('/')
                        if s_publisher and len(s_publisher) > 0:
                            organization_minhap = s_publisher[-1]
                            org = organizations.get(organization_minhap, None)
                            if org:
                                publisher = [org[1], PUBLISHER_PREFIX +
                                        organization_minhap, organization_minhap]
            if publisher[1]:
                self._add_resource_list_triple(
                        dataset_ref, DCT.publisher, publisher[1], publisher[0], None, None, publisher[2])
            else:
                g.add((dataset_ref, DCT.publisher, URIRef(publisher[0])))

            # Spatial Coverage
            value = self._get_dict_value(dataset_dict, DS_SPATIAL)
            if value:
                self._add_resource_list_triple(
                    dataset_ref, DCT.spatial, value)

            # Temporal
            temporal_coverage = self._get_dataset_value(
                dataset_dict, DS_TEMPORAL_COVERAGE)
            i = 1
            if temporal_coverage:
                for key, value in temporal_coverage.items():
                    if (value):
                        start = end = None
                        if 'from' in value:
                            start = value.get('from')
                        if 'to' in value:
                            end = value.get('to')
                        if start or end:
                            temporal_extent = URIRef(
                                "%s/%s-%s" % (dataset_ref, 'PeriodOfTime', i))
                            g.add(
                                (temporal_extent, RDF.type, DCT.PeriodOfTime))
                            if start:
                                self._add_date_triple(
                                    temporal_extent, SCHEMA.startDate, start)
                            if end:
                                self._add_date_triple(
                                    temporal_extent, SCHEMA.endDate, end)
                            g.add((dataset_ref, DCT.temporal, temporal_extent))
                            i = i+1

            # References
            value = self._get_dict_value(dataset_dict, DS_REFERENCE)
            if value:
                self._add_resource_list_triple(
                    dataset_ref, DCT.references, value)

            # Conforms To
            value = self._get_dict_value(dataset_dict, DS_NORMATIVE)
            if value:
                self._add_resource_list_triple(
                    dataset_ref, DCT.conformsTo, value)

            # License (dataset license)
            if dataset_dict.get(DS_LICENSE):
                g.add((dataset_ref, DCT.license, URIRef(
                        dataset_dict.get(DS_LICENSE))))

            # Distributions/Resources
            for resource_dict in dataset_dict.get('resources', []):
                uri_resource = '%s/resource/%s' % (
                    dataset_ref, resource_dict['id'])
                distribution = URIRef(uri_resource)
                g.add((dataset_ref, DCAT.distribution, distribution))
                g.add((distribution, RDF.type, DCAT.Distribution))

                # Identifier
                self._add_triple_from_dict(
                    resource_dict, distribution, DCT.identifier, DS_RESOURCE_IDENTIFIER, None, False, False)

                # Title
                self._add_translated_triple_field_from_dict(
                    resource_dict, distribution, DCT.title, DS_RESOURCE_NAME_TRANSLATED, None)

                # License (dataset license)
                if dataset_dict.get(DS_LICENSE):
                    g.add((distribution, DCT.license, URIRef(
                        dataset_dict.get(DS_LICENSE))))

                # Access URL
                if resource_dict.get(DS_RESOURCE_ACCESS_URL):
                    g.add((distribution, DCAT.accessURL, Literal(
                        resource_dict.get(DS_RESOURCE_ACCESS_URL), datatype=XSD.anyURI)))

                # Format
                if resource_dict.get(DS_RESOURCE_FORMAT, None):
                    imt = URIRef("%s/format" % uri_resource)
                    g.add((imt, RDF.type, DCT.IMT))
                    g.add((distribution, DCT['format'], imt))

                    format = resource_dict.get(
                        DS_RESOURCE_FORMAT, None)
                    formats = dataset_dict.get(
                        EXPORT_AVAILABLE_RESOURCE_FORMATS, {})
                    label = None
                    if format and format in formats:
                        label = formats.get(format, None)
                    else:
                        _dataset = sh.scheming_get_schema(
                            'dataset', 'dataset')
                        res_format = sh.scheming_field_by_name(_dataset.get('resource_fields'),
                                                               'format')
                        formats[format] = sh.scheming_choices_label(
                            res_format['choices'], format)
                        label = formats.get(format, None)
                        dataset_dict[EXPORT_AVAILABLE_RESOURCE_FORMATS] = formats
                    if label:
                        g.add((imt, RDFS.label, Literal(label)))
                    g.add((imt, RDF.value, Literal(
                        resource_dict[DS_RESOURCE_FORMAT])))

                # Size
                if resource_dict.get(DS_RESOURCE_BYTE_SIZE):
                    try:
                        g.add((distribution, DCAT.byteSize,
                               Literal(float(resource_dict[DS_RESOURCE_BYTE_SIZE]),
                                       datatype=XSD.decimal)))
                    except (ValueError, TypeError):
                        g.add((distribution, DCAT.byteSize,
                               Literal(resource_dict[DS_RESOURCE_BYTE_SIZE])))
                # Relation
                value = self._get_dict_value(
                    dataset_dict, DS_NORMATIVE)
                if value:
                    self._add_resource_list_triple(
                        distribution, DCT.relation, value)

        except Exception, e:
            log.error("%s [dataset_ref: %s]. Unexpected Error %s: %s" % (
                method_log_prefix, dataset_ref, type(e).__name__, e))
コード例 #19
0
def _get_process_state_field(dataset_type):
    dataset_scheme = h.scheming_get_schema('dataset', dataset_type)
    if not dataset_scheme:
        return None
    fields = dataset_scheme['dataset_fields']
    return h.scheming_field_by_name(fields, "process_state")
コード例 #20
0
    def before_index(self, data_dict):
        dataset = sh.scheming_get_schema('dataset', 'dataset')
        if ('res_format' in data_dict):
            #Get format field
            formats = sh.scheming_field_by_name(dataset.get('resource_fields'),
                                                'format')

            #Create SOLR field
            data_dict['res_format_label'] = []
            for res_format in data_dict['res_format']:
                #Get format label
                res_format_label = sh.scheming_choices_label(
                    formats['choices'], res_format)
                if res_format_label:
                    #Add label to new SOLR field
                    data_dict['res_format_label'].append(res_format_label)

        if ('publisher' in data_dict):
            organismo = data_dict['publisher']
            if is_frontend():
                publisher = toolkit.get_action('dge_organization_publisher')(
                    {
                        'model': model
                    }, {
                        'id': organismo
                    })
            else:
                publisher = h.get_organization(organismo)
            data_dict['publisher'] = publisher.get('id')
            data_dict['publisher_display_name'] = publisher.get('display_name')
            administration_level_code = helpers.dge_get_organization_administration_level_code(
                publisher)
            if not administration_level_code or administration_level_code not in TRANSLATED_UNITS:
                administration_level_code = DEFAULT_UNIT
            data_dict['administration_level'] = administration_level_code
            data_dict['administration_level_es'] = TRANSLATED_UNITS[
                administration_level_code]['es'] or ''
            data_dict['administration_level_en'] = TRANSLATED_UNITS[
                administration_level_code]['en'] or ''
            data_dict['administration_level_ca'] = TRANSLATED_UNITS[
                administration_level_code]['ca'] or ''
            data_dict['administration_level_eu'] = TRANSLATED_UNITS[
                administration_level_code]['eu'] or ''
            data_dict['administration_level_gl'] = TRANSLATED_UNITS[
                administration_level_code]['gl'] or ''

        if ('theme' in data_dict):
            #Get theme field
            categoria = sh.scheming_field_by_name(
                dataset.get('dataset_fields'), 'theme')

            #Get theme value
            valor_categoria = data_dict['theme']

            #Empty theme values
            data_dict['theme'] = []
            data_dict['theme_id'] = []
            data_dict['theme_es'] = []
            data_dict['theme_en'] = []
            data_dict['theme_ca'] = []
            data_dict['theme_eu'] = []
            data_dict['theme_gl'] = []

            #Get key values
            valores = valor_categoria.replace('[', '').replace(']', '')
            categorias = valores.split('", "')
            #Get translated label for each key
            for term_categoria in list(categorias):
                clean_term = term_categoria.replace('"', '')
                data_dict['theme'].append(clean_term)
                data_dict['theme_id'].append(helpers.dge_theme_id(clean_term))
                #Look for label in the scheme
                for option in categoria.get('choices'):
                    if option['value'] == clean_term:
                        #Add label for each language
                        data_dict['theme_es'].append(option['label']['es'])
                        data_dict['theme_en'].append(option['label']['en'])
                        data_dict['theme_ca'].append(option['label']['ca'])
                        data_dict['theme_eu'].append(option['label']['eu'])
                        data_dict['theme_gl'].append(option['label']['gl'])
        return data_dict