Ejemplo n.º 1
0
def validation_get_schema(dataset_type, resource_type):
    schema = scheming_get_dataset_schema(dataset_type)
    for resource in schema.get('resources', []):
        if resource.get("resource_type", "") == resource_type:
            for field in resource.get('resource_fields', []):
                if field['field_name'] == "schema":
                    return validation_load_json_schema(field['field_value'])
Ejemplo n.º 2
0
def get_dataset_schema(context, data_dict):
    """
    Returns full schema definition for the dataset `name`.

    :param name: The name of the schema to return.
    :param expanded: Expand schema presets. Defaults to `True`.
    :returns:  A complete dataset schema or 404 if not found.
    :rtype: dict
    """
    schema_name = _get_or_bust(data_dict, 'name')

    expanded = data_dict.get('expanded', True)
    if isinstance(expanded, basestring):
        expanded = True if expanded == 'true' else False

    result = scheming_helpers.scheming_get_dataset_schema(
        schema_name,
        expanded=expanded
    )

    if result is None:
        raise _NotFound(('no schema by the name {name}'.format(
            name=schema_name
        ),))

    return result
Ejemplo n.º 3
0
    def resource_copy(self, id, resource_id):
        context = {'model': model, 'user': toolkit.c.user}

        # Check access
        try:
            toolkit.check_access('package_update', context, {'id': id})
        except toolkit.NotAuthorized:
            message = 'Not authorized to copy resource of dataset "%s"'
            return toolkit.abort(403, message % id)

        # Get resource
        try:
            resource = toolkit.get_action('resource_show')(context, {
                'id': resource_id
            })
        except (toolkit.NotAuthorized, toolkit.ObjectNotFound):
            message = 'Not found resource "%s" of dataset "%s"'
            return toolkit.abort(404, message % (resource_id, id))

        # Extract data
        data = {}
        schema = scheming_get_dataset_schema('dataset')
        for field in schema['resource_fields']:
            # We skip url field (current file)
            if field['field_name'] == 'url':
                continue
            # We skip autogenerated fields
            if field.get('form_snippet', True) is None:
                continue
            if field['field_name'] in resource:
                data[field['field_name']] = resource[field['field_name']]
        data['name'] = '%s (copy)' % resource.get('name')

        return self.new_resource(id, data=data)
Ejemplo n.º 4
0
def get_dataset_schema(context, data_dict):
    """
    Returns full schema definition for the dataset `name`.

    :param name: The name of the schema to return.
    :param expanded: Expand schema presets. Defaults to `True`.
    :returns:  A complete dataset schema or 404 if not found.
    :rtype: dict
    """
    schema_name = _get_or_bust(data_dict, 'name')

    expanded = data_dict.get('expanded', True)
    if isinstance(expanded, basestring):
        expanded = True if expanded == 'true' else False

    result = scheming_helpers.scheming_get_dataset_schema(
        schema_name,
        expanded=expanded
    )

    if result is None:
        raise _NotFound(('no schema by the name {name}'.format(
            name=schema_name
        ),))

    return result
Ejemplo n.º 5
0
def _merge_with_schema_default_values(package_type, resource_type, data_dict):
    """
    This function merges the file uploader default resource with the default
    values specified in the ckanext-schemining schema. It allows us to bulk
    upload multiple copies ofa particular resource type e.g. multiple spectrum
    files.
    """
    # If no package_type or resource_type we can't do this.
    if not (package_type and resource_type):
        return data_dict

    schema = scheming_get_dataset_schema(package_type)
    resource_schemas = schema.get("resource_schemas", {})
    resource_schema = resource_schemas.get(resource_type, {})
    file_name = data_dict['name']

    # Step through each field and merge in the default value if it exits.
    for field in resource_schema.get('resource_fields', []):
        if field['field_name'] == 'restricted':
            # TODO: Would be nice if restricted didn't need special treatment
            data_dict["restricted_allowed_users"] = field.get(
                'default_users', "")
            data_dict["restricted_allowed_orgs"] = field.get(
                'default_organizations', "")
        value = field.get('default', field.get('field_value'))
        if value:
            data_dict[field['field_name']] = value

    # Multiple resources with the same name is confusing, so merge in filename
    data_dict['name'] = "{}: {}".format(data_dict.get('name', ""), file_name)
    return data_dict
Ejemplo n.º 6
0
def spc_thematic_area_list(context, data_dict):
    tk.check_access('spc_thematic_area_list', context, data_dict)
    schema = scheming_helpers.scheming_get_dataset_schema('dataset')
    field = scheming_helpers.scheming_field_by_name(schema['dataset_fields'],
                                                    'thematic_area_string')
    choices = scheming_helpers.scheming_field_choices(field)
    return choices
Ejemplo n.º 7
0
    def _get_facets_title_with_translation(self):
        '''
        Get the translated facet title
        '''
        # name of additional facets
        additional_facets_name = {}

        # stop if the facet list is empty
        if not self.additional_facets:
            return additional_facets_name

        # get current environment's language
        language = additional_facets_helpers.lang()

        # search and get the translated title for facet
        for facet in self.additional_facets:
            if self.DATASET_FIELD in facet:
                # if 'facet_name' and 'dataset_type' exist, wins the 'facet_name'
                if self.FACET_NAME_FIELD in facet:
                    if type(facet[self.FACET_NAME_FIELD]) is dict:
                        label_array = facet[self.FACET_NAME_FIELD]
                        for key, value in label_array.iteritems():
                            if key == language and value is not None:
                                additional_facets_name[facet[self.DATASET_FIELD]] = value
                    else:
                        additional_facets_name[facet[self.DATASET_FIELD]] =  facet[self.FACET_NAME_FIELD]
                else:
                    if facet[self.DATASET_TYPE_FIELD]:
                        from ckanext.scheming import helpers as scheming_helpers
                        package_type = self._get_dataset_type_of_facet(facet[self.DATASET_FIELD])
                        schema = scheming_helpers.scheming_get_dataset_schema(package_type)

                        if schema is None:
                            continue

                        schema_name = facet[self.DATASET_FIELD]
                        #remove prefix in facet name
                        schema_name = schema_name.replace('extras_', '')
                        schema_name = schema_name.replace('res_extras_', '')

                        # switch for dataset or resource
                        if schema_name.startswith( 'res_' ) and 'resource_fields' in schema:
                           fields_from_schema = schema['resource_fields']
                        elif 'dataset_fields' in schema:
                            fields_from_schema = schema['dataset_fields']
                        else:
                           continue

                        for field in fields_from_schema:
                            # ckanext-scheming schemas
                            if field['field_name'] == schema_name and 'label' in field:
                                if type(field['label']) is dict:
                                    label_array = field['label']
                                    for key, value in label_array.iteritems():
                                        if key == language and value is not None:
                                            additional_facets_name[facet[self.DATASET_FIELD]] = value
                                else:
                                    additional_facets_name[facet[self.DATASET_FIELD]] = value = field['label']
        return additional_facets_name
Ejemplo n.º 8
0
    def before_index(self, pkg_dict):
        # Remove internal non-indexable fields

        # admin_notes
        pkg_dict.pop('admin_notes', None)
        pkg_dict.pop('extras_admin_notes', None)

        # sampling_procedure_notes
        pkg_dict.pop('sampling_procedure_notes', None)
        pkg_dict.pop('extras_sampling_procedure_notes', None)

        # response_rate_notes
        pkg_dict.pop('response_rate_notes', None)
        pkg_dict.pop('extras_response_rate_notes', None)

        # data_collection_notes
        pkg_dict.pop('data_collection_notes', None)
        pkg_dict.pop('extras_data_collection_notes', None)

        # weight_notes
        pkg_dict.pop('weight_notes', None)
        pkg_dict.pop('extras_weight_notes', None)

        # clean_ops_notes
        pkg_dict.pop('clean_ops_notes', None)
        pkg_dict.pop('extras_clean_ops_notes', None)

        # data_accs_notes
        pkg_dict.pop('data_accs_notes', None)
        pkg_dict.pop('extras_data_accs_notes', None)

        # Index labels on selected fields

        schema = scheming_get_dataset_schema('dataset')
        fields = [
            'data_collector', 'keywords', 'sampling_procedure',
            'operational_purpose_of_data', 'data_collection_technique',
            'process_status', 'identifiability'
        ]
        for field in fields:
            if pkg_dict.get(field):
                value = pkg_dict[field]
                try:
                    values = json.loads(pkg_dict[field])
                except ValueError:
                    values = [value]

                out = []

                for schema_field in schema['dataset_fields']:
                    if schema_field['field_name'] == field:
                        for item in values:
                            for choice in schema_field['choices']:
                                if choice['value'] == item:
                                    out.append(choice['label'])
                pkg_dict['vocab_' + field] = out

        return pkg_dict
Ejemplo n.º 9
0
def datawa_scheming_select_options(field_name):
    schema = sh.scheming_get_dataset_schema('dataset')
    try:
        access_level_options = sh.scheming_field_by_name(
            schema['dataset_fields'], field_name)['choices']
        options = {i['value']: i['label'] for i in access_level_options}
    except Exception as e:
        raise e
    return options
Ejemplo n.º 10
0
def get_choice_label(name, value, is_resource=False):
    schema = scheming_get_dataset_schema('deposited-dataset')
    fields = schema['resource_fields'] if is_resource else schema[
        'dataset_fields']
    field = scheming_field_by_name(fields, name)
    for choice in field.get('choices', []):
        if choice.get('value') == value:
            return choice.get('label')
    return value
def datawa_scheming_select_options(field_name):
    schema = sh.scheming_get_dataset_schema("dataset")
    try:
        access_level_options = sh.scheming_field_by_name(
            schema["dataset_fields"], field_name)["choices"]
        options = {i["value"]: i["label"] for i in access_level_options}
    except Exception as e:
        raise e
    return options
Ejemplo n.º 12
0
    def _get_facet_item_label_with_translation(self, dataset_facet_field, default_facet_label):
        '''
        Translate the default label of facet item. Return the default facet label if no translation available
        :param dataset_facet_field: the name of facet field in the dataset
        :param default_facet_label: the default label of the facet item
        '''
        from ckanext.scheming import helpers as scheming_helpers
        package_type = self._get_dataset_type_of_facet(dataset_facet_field)
        schema = scheming_helpers.scheming_get_dataset_schema(package_type)

        # if a facet has `facet_items` and `dataset_type`, wins `facet_items`
        if self._get_facet_items_of_facet(dataset_facet_field, self.additional_facets) is None:

            # if schema exists
            if schema is not None:
                schema_name = dataset_facet_field
                #remove prefix in facet name
                schema_name = schema_name.replace('extras_', '')
                schema_name = schema_name.replace('res_extras_', '')

                # switch for dataset or resource
                if schema_name.startswith( 'res_' ) and 'resource_fields' in schema:
                    fields_from_schema = schema['resource_fields']
                elif 'dataset_fields' in schema:
                    fields_from_schema = schema['dataset_fields']
                else:
                    return self._translate_facet_item_label(dataset_facet_field, default_facet_label)

                for field in fields_from_schema:
                    if field['field_name'] == schema_name:
                        #if item key is given - see facet_list.html
                        if default_facet_label is not None:
                            if 'choices' in field:
                                return scheming_helpers.scheming_choices_label(field['choices'], default_facet_label)
                            elif 'choices_helper' in field:
                                from ckantoolkit import h
                                choices_fn = getattr(h, field['choices_helper'])
                                return scheming_helpers.scheming_choices_label(choices_fn(field), default_facet_label)
                            else:
                                return default_facet_label;
                        else:
                            if len(field['label']) > 1 and type(field['label']) is dict:
                                label_array = field['label']
                                language = scheming_helpers.lang()
                                for key, value in label_array.iteritems():
                                    if key == language:
                                        if value is not None:
                                            return value
                                        else:
                                            return default_facet_label
                            if field['label'] is not None:
                                return field['label']
                            else:
                                return default_facet_label

        return self._translate_facet_item_label(dataset_facet_field, default_facet_label)
Ejemplo n.º 13
0
    def metadata_download(self, package_id):
        context = {
            'model': model,
            'session': model.Session,
            'user': p.toolkit.c.user
        }

        data_dict = {
            'id': package_id,
        }
        try:
            result = get_action('package_show')(context, data_dict)
        except (ObjectNotFound, NotAuthorized):
            abort(404, _('Package not found'))

        dataset_fields = helpers.scheming_get_dataset_schema(
            "dataset")['dataset_fields']
        if hasattr(response, u'headers'):
            response.headers['Content-Type'] = 'text/csv'
            response.headers['Content-disposition'] = \
            'attachment; filename="{name}-metadata.csv"'.format(name=package_id)

        f = StringIO.StringIO()
        wr = csv.writer(f, encoding='utf-8')

        header = ['Field', 'Value']
        wr.writerow(header)

        for field in dataset_fields:
            if field['field_name'] == 'tag_string':
                value = self.get_package_tags(result.get('tags'))
                wr.writerow(
                    [helpers.scheming_language_text(field['label']), value])
            elif field['field_name'] == 'owner_org':
                org_alias = str(
                    config.get('ckan.organization_alias', 'Organization'))
                wr.writerow([org_alias, result['organization']['title']])
            elif field['field_name'] == 'groups':
                group_alias = str(config.get('ckan.group_alias',
                                             'Group')) + 's'
                value = self.get_package_groups(result.get('groups'))
                wr.writerow([group_alias, value])
            elif helpers.scheming_field_choices(field):
                value = helpers.scheming_choices_label(
                    helpers.scheming_field_choices(field),
                    result.get(field['field_name']))
                wr.writerow(
                    [helpers.scheming_language_text(field['label']), value])
            else:
                wr.writerow([
                    helpers.scheming_language_text(field['label']),
                    result.get(field['field_name'])
                ])

        return f.getvalue()
Ejemplo n.º 14
0
def get_field_label(name, is_resource=False):
    schema = scheming_get_dataset_schema('deposited-dataset')
    fields = schema['resource_fields'] if is_resource else schema[
        'dataset_fields']
    field = scheming_field_by_name(fields, name)
    if field:
        return field.get('label', name)
    else:
        log.warning(
            'Could not get field {} from deposited-dataset schema'.format(
                name))
Ejemplo n.º 15
0
def _get_group(result):
    if result['type'] != 'subject':
        return

    type_schema = scheming_helpers.scheming_get_dataset_schema(result['type'])

    for field in type_schema['dataset_fields']:
        if field['field_name'] == 'subject_display_code':
            for choice in field['choices']:
                if choice['value'] == result.get('subject_display_code', '-1'):
                    return choice['label']
Ejemplo n.º 16
0
def scheming_dataset_schema_show(context, data_dict):
    '''
    Return the scheming schema for a given dataset type

    :param type: the dataset type
    '''
    t = get_or_bust(data_dict, 'type')
    s = scheming_get_dataset_schema(t)
    if s is None:
        raise ObjectNotFound()
    return s
Ejemplo n.º 17
0
def get_resource_value_label(field_name, resource, dataset_type='dataset'):
    schema = scheming_get_dataset_schema(dataset_type)

    for field in schema['resource_fields']:
        if field['field_name'] == field_name:
            return toolkit.render_snippet(
                'scheming/snippets/display_field.html',
                data=dict(field=field,
                          data=resource,
                          entity_type='dataset',
                          object_type=dataset_type))
Ejemplo n.º 18
0
def get_field_choices(dataset_type):
   from ckanext.scheming import helpers as scheming_helpers
   schema = scheming_helpers.scheming_get_dataset_schema(dataset_type)
   fields = dict()
   for field in schema['dataset_fields']:
      if field.get('choices'):
         choices_new = dict()
         for choice in field.get('choices'):
            choices_new[choice['value']] = choice['label']['zh_TW'] if isinstance(choice['label'], dict) else choice['label']
         fields[field['field_name']] = choices_new
   return fields
Ejemplo n.º 19
0
def _get_group(result):
    if result['type'] != 'subject':
        return

    type_schema = scheming_helpers.scheming_get_dataset_schema(result['type'])

    for field in type_schema['dataset_fields']:
        if field['field_name'] == 'subject_display_code':
            for choice in field['choices']:
                if choice['value'] == result.get('subject_display_code', '-1'):
                    return choice['label']
Ejemplo n.º 20
0
 def _pull_facet_title_from_schema(self, package_type, name, item_name,
                                   title):
     schema = scheming_helpers.scheming_get_dataset_schema(package_type)
     language = scheming_helpers.lang()
     schema_name = name
     schema_name = schema_name.replace('res_extras_', '')
     # switch for dataset or resource
     if schema_name.startswith('res_'):
         fields_from_schema = schema['resource_fields']
     else:
         fields_from_schema = schema['dataset_fields']
     for field in fields_from_schema:
         if field['field_name'] == schema_name:
             #if item key is given - see facet_list.html
             if item_name is not None:
                 if 'choices' in field:
                     for entry in field['choices']:
                         if entry['value'] == item_name:
                             if len(entry['label']) > 1 and type(
                                     entry['label']) is dict:
                                 label_array = entry['label']
                                 for key, value in label_array.iteritems():
                                     if key == language:
                                         if value is not None:
                                             return value
                                         else:
                                             return title
                                 if value is not None:
                                     return value
                                 else:
                                     return title
                 else:
                     return title
             else:
                 if len(field['label']) > 1 and type(
                         field['label']) is dict:
                     label_array = field['label']
                     for key, value in label_array.iteritems():
                         if key == language:
                             if value is not None:
                                 return value
                             else:
                                 return title
                     if value is not None:
                         return value
                     else:
                         return title
                 if field['label'] is not None:
                     return field['label']
                 else:
                     return title
     return title
Ejemplo n.º 21
0
def scheming_dataset_schema_show(context, data_dict):
    """
    Return the scheming schema for a given dataset type

    :param type: the dataset type
    :param expanded: True to expand presets (default)
    """
    t = get_or_bust(data_dict, 'type')
    expanded = data_dict.get('expanded', True)
    s = scheming_get_dataset_schema(t, expanded)
    if s is None:
        raise ObjectNotFound()
    return s
Ejemplo n.º 22
0
def scheming_dataset_schema_show(context, data_dict):
    '''
    Return the scheming schema for a given dataset type

    :param type: the dataset type
    :param expanded: True to expand presets (default)
    '''
    t = get_or_bust(data_dict, 'type')
    expanded = data_dict.get('expanded', True)
    s = scheming_get_dataset_schema(t, expanded)
    if s is None:
        raise ObjectNotFound()
    return s
Ejemplo n.º 23
0
def get_choice_label(name, value, is_resource=False):
    schema = scheming_get_dataset_schema('deposited-dataset')
    fields = schema['resource_fields'] if is_resource else schema[
        'dataset_fields']
    field = scheming_field_by_name(fields, name)
    if field:
        for choice in field.get('choices', []):
            if choice.get('value') == value:
                return choice.get('label')
        return value
    else:
        log.warning(
            'Could not get field {} from deposited-dataset schema'.format(
                name))
def _map_gdl_to_publication(data_dict, obj):
    dataset = {
        "id": str(uuid.uuid3(uuid.NAMESPACE_DNS, str(data_dict['id']))),
        "type": "publications",
        "title": data_dict['title'],
        "creator": [a['name'] for a in data_dict['authors']],
        # "subject": data_dict,
        "notes": data_dict['description'],
        "publisher": data_dict.get('relatedOrganisation'),
        # "contributor": [a['name'] for a in data_dict['authors']],
        "date": data_dict.get('created'),
        "metadata_modified": data_dict.get('created'),
        # "publication_type": data_dict,
        # "format": data_dict,
        "identifier": data_dict['identifier'],
        "source": data_dict.get('source'),
        # "language": data_dict,
        # "relation": data_dict,
        # "spatial": data_dict,
        # "rights": data_dict,
        "license_id": 'notspecified',
        "member_countries": 'other',  # relatedCountry, optional
        "harvest_source": 'GDL'
    }
    thematic_area = data_dict.get('thematicArea', {}).get('area')
    if thematic_area:
        dataset["thematic_area_string"] = thematic_area_mapping.get(
            thematic_area)
    related_country = data_dict.get('relatedCountry')
    if related_country:
        schema = sh.scheming_get_dataset_schema('publications')
        choices = sh.scheming_field_by_name(schema['dataset_fields'],
                                            'member_countries')['choices']
        member_country = F.first(
            F.filter(
                F.compose(F.rpartial(contains, related_country),
                          itemgetter('label')), choices))
        if member_country:
            dataset['member_countries'] = member_country['value']
            spatial = get_extent_for_country(member_country['label'])
            if spatial:
                dataset['spatial'] = spatial['value']
    if data_dict['file']:
        res_url = _gl_url(obj.source.url, 'download') + '?id=' + str(
            data_dict['id'])
        res = {'name': data_dict['file'], 'url': res_url}
        res['format'] = splitext(res['name'])[1].lstrip('.')
        dataset['resources'] = [res]

    return dataset
Ejemplo n.º 25
0
def copy(package_type, dataset_id):
    """ Copy a dataset """
    context = {'model': model, 'user': toolkit.c.user}

    # Get organizations
    orgs = toolkit.get_action('organization_list_for_user')(
        context.copy(), {
            'permission': 'create_dataset'
        })
    org_ids = [org['id'] for org in orgs]

    # Check access
    if not orgs:
        message = 'Not authorized to copy dataset "%s"'
        return toolkit.abort(403, message % dataset_id)

    # Get dataset
    try:
        dataset = toolkit.get_action('package_show')(context.copy(), {
            'id': dataset_id
        })
    except (toolkit.NotAuthorized, toolkit.ObjectNotFound):
        message = 'Not found py dataset "%s"'
        return toolkit.abort(404, message % dataset_id)

    # Extract data
    data = {}
    schema = scheming_get_dataset_schema('dataset')
    for field in schema['dataset_fields']:
        # We skip name/title
        if field['field_name'] in ['name', 'title']:
            continue
        # We skip autogenerated fields
        if field.get('form_snippet', True) is None:
            continue
        # We skip empty fields
        if field['field_name'] not in dataset:
            continue
        data[field['field_name']] = dataset[field['field_name']]
    data['type'] = 'dataset'
    data['private'] = bool(dataset.get('private'))
    if data.get('owner_org'):
        data['owner_org'] = data['owner_org'] if data[
            'owner_org'] in org_ids else None
    data['original_dataset'] = dataset
    data['tags'] = dataset['tags']

    view = CreateView()
    return view.get(package_type, data=data)
Ejemplo n.º 26
0
    def new(self, ds_id, ds_type):
        new_payload = None

        if 'save' not in request.params:
            lc = ckanapi.LocalCKAN()
            pkg = lc.action.package_show(id=ds_id)
            pkg_id = pkg[PRODUCT_ID]

            parent_schema = scheming_get_dataset_schema(pkg['type'])

            new_payload = {
                'type': ds_type,
                'top_parent_id': pkg.get('top_parent_id', pkg_id) or pkg_id
            }

            id_payload = {
                'parentProductId': pkg['product_id_new'],
                'parentProduct': pkg['product_id_new'],
                'productType': str(
                    parent_schema['dataset_type_code']
                ),
                'productTypeCode': str(
                    parent_schema['dataset_type_code']
                )
            }

            if ds_type == 'format':
                new_payload['parent_id'] = pkg_id
            elif ds_type == 'article':
                pass
            elif ('non_data_product' in parent_schema and
                    parent_schema['non_data_product'] == True):
                if is_legacy_product(pkg[PRODUCT_ID]):
                    new_payload[PRODUCT_ID] = lc.action.GetNextLegacyProductId(
                        **id_payload
                    )
                else:
                    id_payload['subjectCode'] = pkg['subject_codes'][0]
                    new_payload[PRODUCT_ID] = lc.action.GetNextNonDataProductId(
                        **id_payload
                    )
            else:
                new_payload[PRODUCT_ID] = lc.action.GetNextProductId(
                    **id_payload
                )

        return PackageController().new(new_payload)
Ejemplo n.º 27
0
def _normalize_dataset_dict(dataset_dict):
    '''
    Adapt the dataset dict returned by the RDF harvester to the one expected by
    the custom Honduras schema
    '''

    dataset_schema = scheming_get_dataset_schema('dataset')

    field_names = [f['field_name'] for f in dataset_schema['dataset_fields']]

    # Promote extras to root fields
    for name in field_names:
        val = _remove_dataset_dict_extra(dataset_dict, name)
        if val:
            dataset_dict[name] = val

    return dataset_dict
Ejemplo n.º 28
0
    def get_dataset_fields(self):
        fields = model.Package.get_fields(core_only=True)

        scheming_schema = scheming_get_dataset_schema(
            'dataset')['dataset_fields']

        scheming_fields = []
        for field in scheming_schema:
            scheming_fields.append(field['field_name'].encode('utf8'))

        # Remove duplicate fields, since scheming can contain fields named similarly to CKAN core fields
        for field in scheming_fields:
            if field not in fields:
                fields.append(field)

        log.info(fields)
        return fields
Ejemplo n.º 29
0
    def after_search(self, search_results, search_params):
        facets = search_results.get('search_facets')
        results = search_results.get('results')
        if not facets or not results:
            return search_results
        schema = scheming_helpers.scheming_get_dataset_schema(results[0]['type'])
        for facet in facets.values():
            for item in facet['items']:
                field_name = facet['title'].replace('_facet', '')
                field = scheming_helpers.scheming_field_by_name( \
                        schema['dataset_fields'], field_name)
                if field and (field.get('choices') or \
                        field.get('choices_helper')):
                    choices = scheming_helpers.scheming_field_choices(field)
                    item['display_name'] = scheming_helpers. \
                            scheming_choices_label(choices, item['name'])

        return search_results
Ejemplo n.º 30
0
 def _pull_title_from_schema(self, package_type):
     language = scheming_helpers.lang()
     schema = scheming_helpers.scheming_get_dataset_schema(package_type)
     if 'dataset_type_label' in schema:
         if len(schema['dataset_type_label']) > 1 and type(
                 schema['dataset_type_label']) is dict:
             label_array = schema['dataset_type_label']
             for key, value in label_array.iteritems():
                 if key == language:
                     if value is not None:
                         return value
                     else:
                         return schema['dataset_type']
                     if value is not None:
                         return value
                     else:
                         return schema['dataset_type']
     else:
         return schema['dataset_type']
    def _extract_additional_fields(self, content, package_dict):
        package_dict['thematic_area_string'] = self.topic

        if not package_dict.get('license_id'):
            package_dict['license_id'] = 'notspecified'

        skip_keys = {'set_spec', 'description'}

        for key, value in content.items():
            if key in package_dict or key in skip_keys:
                continue
            if key == 'type':
                key = 'publication_type'
            package_dict[key] = value

        package_dict.pop('extras', None)
        package_dict['type'] = 'publications'
        package_dict.pop('maintainer_email', None)

        coverage = package_dict.pop('coverage', None)
        if coverage:
            schema = scheming_get_dataset_schema('publications')
            field = scheming_field_by_name(schema['dataset_fields'],
                                           'member_countries')
            choices = scheming_field_choices(field)
            package_dict['member_countries'] = [
                choice['value']
                for choice in choices if choice['label'] in coverage
            ] or ['other']
            polygons = [
                t['geometry'] for t in eez.collection
                if any(country in t['properties']['GeoName']
                       for country in coverage)
            ]
            # TODO: for now we are taking first polygon from possible
            # list because of SOLR restriction of spatial field
            # size. In future we may add additional logic here
            if polygons:
                package_dict['coverage'] = json.dumps(polygons[0])

        return package_dict
Ejemplo n.º 32
0
    def new(self, ds_id, ds_type):
        new_payload = None

        if 'save' not in request.params:
            lc = ckanapi.LocalCKAN()
            pkg = lc.action.package_show(id=ds_id)
            pkg_id = pkg[PRODUCT_ID]

            parent_schema = scheming_get_dataset_schema(pkg['type'])

            new_payload = {
                'type': ds_type,
                'top_parent_id': pkg.get('top_parent_id', pkg_id) or pkg_id
            }

            id_payload = {
                'parentProductId': pkg['product_id_new'],
                'parentProduct': pkg['product_id_new'],
                'productType': str(
                    parent_schema['dataset_type_code']
                ),
                'productTypeCode': str(
                    parent_schema['dataset_type_code']
                )
            }

            if ds_type == 'format':
                new_payload['parent_id'] = pkg_id
            elif ds_type == 'issue':
                issue_number = next_issue_number(pkg_id)
                issue_id = u'{pid}{issue_number}'.format(
                    pid=pkg_id,
                    issue_number=issue_number
                )
                new_payload['product_type_code'] = pkg.get('product_type_code')
                new_payload['issue_number'] = issue_number
                new_payload['product_id_new'] = issue_id
                new_payload['name'] = u'issue-{issue_id}'.format(
                    issue_id=issue_id
                )
                pass
            elif ds_type == 'article':
                article_id = next_article_id(
                    pkg.get('top_parent_id', pkg_id) or pkg_id,
                    pkg.get('issue_number')
                )
                new_payload['product_type_code'] = pkg.get('product_type_code')
                new_payload['issue_number'] = pkg.get('issue_number')
                new_payload['product_id_new'] = article_id
                new_payload['name'] = u'article-{article_id}'.format(
                    article_id=article_id
                )
                pass
            elif ('non_data_product' in parent_schema and
                    parent_schema['non_data_product'] == True):
                if is_legacy_product(pkg[PRODUCT_ID]):
                    new_payload[PRODUCT_ID] = lc.action.GetNextLegacyProductId(
                        **id_payload
                    )
                else:
                    id_payload['subjectCode'] = pkg['subject_codes'][0]
                    new_payload[PRODUCT_ID] = lc.action.GetNextNonDataProductId(
                        **id_payload
                    )
            else:
                new_payload[PRODUCT_ID] = lc.action.GetNextProductId(
                    **id_payload
                )

        return PackageController().new(new_payload)
Ejemplo n.º 33
0
def _get_dataset_schema():

    return scheming_get_dataset_schema('dataset')
Ejemplo n.º 34
0
    def import_stage(self, harvest_object):
        log.debug('In PRDREngergyResourcesHarvester import_stage')
        if not harvest_object:
            log.error('No harvest object received')
            return False

        self._set_config(harvest_object.job.source.config)

        if self.force_import:
            status = 'change'
        else:
            status = self._get_object_extra(harvest_object, 'status')
        if status == 'delete':
            context = {
                'model': model,
                'session': model.Session,
                'user': self._get_user_name()
            }

            p.toolkit.get_action('package_delete')(
                context, {
                    'id': harvest_object.package_id
                })
            log.info('Deleted package {0} with guid {1}'.format(
                harvest_object.package_id, harvest_object.guid))

            return True
        if harvest_object.content is None:
            self._save_object_error(
                'Empty content for object %s' % harvest_object.id,
                harvest_object, 'Import')
            return False

        # Get the last harvested object (if any)
        previous_object = model.Session.query(HarvestObject) \
            .filter(HarvestObject.guid == harvest_object.guid) \
            .filter(HarvestObject.current == True) \
            .first()

        # Flag previous object as not current anymore
        if previous_object and not self.force_import:
            previous_object.current = False
            previous_object.add()

        package_dict = self._get_package_dict(harvest_object)
        if not package_dict:
            return False

        if not package_dict.get('name'):
            package_dict['name'] = \
                self._get_package_name(harvest_object, package_dict['title'])

        # copy across resource ids from the existing dataset, otherwise they'll
        # be recreated with new ids

        if status == 'change':
            existing_dataset = self._get_existing_dataset(harvest_object.guid)
            if existing_dataset:
                copy_across_resource_ids(existing_dataset, package_dict)

        # Allow custom harvesters to modify the package dict before creating
        # or updating the package
        package_dict = self.modify_package_dict(package_dict, harvest_object)
        # Unless already set by an extension, get the owner organization (if
        # any) from the harvest source dataset
        if not package_dict.get('owner_org'):
            source_dataset = model.Package.get(harvest_object.source.id)
            if source_dataset.owner_org:
                package_dict['owner_org'] = source_dataset.owner_org

        if not package_dict.get('license_id'):
            package_dict['license_id'] = 'notspecified'

        # Flag this object as the current one
        harvest_object.current = True
        harvest_object.add()

        context = {
            'user': self._get_user_name(),
            'return_id_only': True,
            'ignore_auth': True,
        }

        package_schema = scheming_get_dataset_schema('dataset')
        field = scheming_field_by_name(package_schema['dataset_fields'],
                                       'member_countries')
        choices = scheming_field_choices(field)

        mem_temp_list = [
            x for x in package_dict['member_countries'] if x is not None
        ]
        package_dict['member_countries'] = [
            choice['value']
            for choice in choices if choice['label'] in mem_temp_list
        ] or ['other']

        polygons = [
            t['geometry'] for t in eez.collection
            if any(country in t['properties']['GeoName']
                   for country in mem_temp_list)
        ]
        # TODO: for now we are taking first polygon from possible
        # list because of SOLR restriction of spatial field
        # size. In future we may add additional logic here
        if polygons:
            package_dict['coverage'] = json.dumps(polygons[0])

        if status == 'new':
            # context['schema'] = package_schema

            # We need to explicitly provide a package ID
            package_dict['id'] = unicode(uuid.uuid4())
            # package_schema['id'] = [unicode]

            # Save reference to the package on the object
            harvest_object.package_id = package_dict['id']
            harvest_object.add()

            # Defer constraints and flush so the dataset can be indexed with
            # the harvest object id (on the after_show hook from the harvester
            # plugin)
            model.Session.execute(
                'SET CONSTRAINTS harvest_object_package_id_fkey DEFERRED')
            model.Session.flush()
            package_id = \
                p.toolkit.get_action('package_create')(context, package_dict)
            log.info('Created dataset with id %s', package_id)

        elif status == 'change':
            package_dict['id'] = harvest_object.package_id
            try:
                package_id = \
                    p.toolkit.get_action('package_update')(context, package_dict)
                log.info('Updated dataset with id %s', package_id)
            except NotFound:
                log.info(
                    'Update returned NotFound, trying to create new Dataset.')
                if not harvest_object.package_id:
                    package_dict['id'] = unicode(uuid.uuid4())
                    harvest_object.package_id = package_dict['id']
                    harvest_object.add()
                else:
                    package_dict['id'] = harvest_object.package_id
                package_id = \
                    p.toolkit.get_action('package_create')(context, package_dict)
                log.info('Created dataset with id %s', package_id)
        model.Session.commit()
        stored_package = p.toolkit.get_action('package_show')(context.copy(), {
            'id': package_id
        })
        for res in stored_package.get('resources', []):
            p.toolkit.get_action('resource_create_default_resource_views')(
                context.copy(), {
                    'package': stored_package,
                    'resource': res
                })

        return True
Ejemplo n.º 35
0
    def before_index(self, data_dict):
        """
        customize data sent to solr
        """
        bogus_date = datetime.datetime(1, 1, 1)
        dataset_schema = scheming_get_dataset_schema(
            data_dict.get('type', 'unknown')
        )
        if dataset_schema is None:
            raise ValidationError(
                'Found no schema for following dataset :\n{dump}'.format(
                    dump=json.dumps(data_dict, indent=2)
                )
            )

        # iterate through dataset fields defined in schema
        field_schema = dict()
        for dataset_field in dataset_schema['dataset_fields']:
            d = dataset_field
            field_schema[d['field_name']] = d

        index_data_dict = {}
        authors = []
        # drop extras fields
        for dict_key in data_dict:
            if not dict_key.startswith('extras_'):
                index_data_dict[dict_key] = data_dict[dict_key]
        # iterate through validated data_dict fields and modify as needed
        validated_data_dict = json.loads(data_dict['validated_data_dict'])
        for item in validated_data_dict.keys():
            value = validated_data_dict[item]
            if not value and item in index_data_dict:
                index_data_dict.pop(item)
                continue
            fs = field_schema.get(item, None)
            # ignore all fields not currently in the schema
            if not fs:
                continue

            field_type = fs.get('schema_field_type', 'string')
            multivalued = fs.get('schema_multivalued', False)

            if field_type == 'fluent':
                for key in value.keys():
                    label = u'{item}_{key}'.format(
                        item=item,
                        key=key
                    )
                    index_data_dict[label] = value[key]

            # for code type, the en/fr labels need to be looked up
            # and sent to Solr
            elif field_type == 'code':
                lookup_type = fs.get('lookup', '')
                if lookup_type == 'codeset':
                    lookup = fs.get('codeset_type', '')
                elif lookup_type == 'preset':
                    lookup = fs.get('preset', '')[4:]
                else:
                    lookup = fs.get('lookup', '')
                if lookup and value:
                    label_en = u'{item}_desc_en'.format(
                        item=item
                    )
                    label_fr = u'{item}_desc_fr'.format(
                        item=item
                    )
                    if multivalued:
                        desc_en = []
                        desc_fr = []
                        for v in value:
                            if not v:
                                continue
                            desc = lookup_label(lookup, v, lookup_type)
                            desc_en.append(desc[u'en'])
                            desc_fr.append(desc[u'fr'])

                        index_data_dict[str(item)] = value

                        index_data_dict[label_en] = desc_en
                        index_data_dict[label_fr] = desc_fr
                    else:
                        desc = lookup_label(lookup, value, lookup_type)
                        index_data_dict[label_en] = desc[u'en']
                        index_data_dict[label_fr] = desc[u'fr']
            elif field_type == 'date':
                if value:
                    try:
                        date = parse(value, default=bogus_date)
                        if date != bogus_date:
                            index_data_dict[item] = date.isoformat() + 'Z'
                    except ValueError:
                        continue
            elif item.endswith('_authors'):
                index_data_dict[str(item)] = value
                authors.extend(value)
            else:  # all other field types
                if multivalued:
                    index_data_dict[str(item)] = value
                else:
                    index_data_dict[str(item)] = value

            if authors:
                index_data_dict['authors'] = authors
                index_data_dict['authors_initials'] = list(
                    set(
                        [strip_accents(i[0]).upper() for i in authors]
                    )
                )

        return index_data_dict
Ejemplo n.º 36
0
    def before_index(self, data_dict):
        """
        customize data sent to solr

        :param data_dict:
        :type data_dict dict

        :returns dict
        """
        dataset_schema = scheming_get_dataset_schema(data_dict.get('type'))
        if dataset_schema is None:
            raise ValidationError((_(
                'Found no schema for following datasets:\n{dump}'.format(
                    dump=json.dumps(data_dict, indent=2, sort_keys=True)
                )
            ),))

        field_schema = dict(
            (s['field_name'], s) for s in dataset_schema['dataset_fields']
        )

        index_data_dict = data_dict.copy()
        for k in data_dict:
            if k.startswith(u'extras_'):
                index_data_dict.pop(k, None)

        authors = []
        default_date = datetime(1, 1, 1, 8, 30, 0, 0)

        validated_data_dict = json.loads(data_dict['validated_data_dict'])

        name = validated_data_dict.get(u'name')

        # append dguids from the datastore
        if validated_data_dict.get(u'product_id_new'):
            index_data_dict[u'dguid_codes'] = []
            for dguid_pkg_id in geo.get_geodescriptors_for_package(
                validated_data_dict[u'product_id_new']):
                index_data_dict[u'dguid_codes'].append(
                        helpers.get_dguid_from_pkg_id(dguid_pkg_id))
            # strip the vintages from dguids to get geodescriptors
            index_data_dict[u'geodescriptor_codes'] = \
                [g[4:] if is_dguid(g) else g
                 for g in index_data_dict[u'dguid_codes'] if g]

        for item, value in validated_data_dict.iteritems():
            fs = field_schema.get(item)

            # Do not index any field that is not currently in the schema.
            if not fs:
                continue

            field_type = fs.get('schema_field_type', 'string')
            # TODO: we're not using the multivalued schema field.  Drop it?
            multivalued = fs.get('schema_multivalued', False)

            # Legacy issues numbers are non-numeric, which is problematic
            # for sorting and external tools. We can't just use a Solr
            # <copyTo> directive, as it'll fail entirely on a bad value.
            if name == 'issue_number':
                if value.isdigit():
                    index_data_dict['issue_number_int'] = int(value)

            # Fluent (multilingual) fields are really dictionaries, where
            # each key is the ISO language code, and the value the translated
            # text. We need to unpack these into individual solr fields
            # for per-language search.
            if field_type == 'fluent':
                if isinstance(value, dict):
                    index_data_dict.update(
                        (u'{0}_{1}'.format(item, k), v)
                        for k, v in value.iteritems()
                    )
                else:
                    raise ValidationError((_(
                        '{name}: Expecting a fluent dict for {item}, '
                        'instead got {value!r}'.format(
                            name=name,
                            item=item,
                            value=value
                        )
                    ), ))

            # Numeric foreign keys that need to be looked up to retrieve
            # their multilingual labels for searching.
            elif field_type == u'code':
                index_data_dict[unicode(item)] = value

                # These codes can refer to a codeset (a dataset of type
                # 'codeset' with a particular key), a preset (a hardcoded
                # value in a Scheming schema), or another dataset (lookup).
                lookup_type = fs.get(u'lookup', '')
                if lookup_type == u'codeset':
                    lookup = fs.get(u'codeset_type', '')
                elif lookup_type == u'preset':
                    lookup = fs.get(u'preset', '')[4:]
                else:
                    lookup = fs.get(u'lookup', '')

                if not lookup:
                    raise ValidationError((_(
                        '{name}: unable to determine lookup '
                        'for {item}'.format(
                            name=name,
                            item=item
                        )
                    ), ))

                if isinstance(value, list):
                    for value_to_lookup in value:
                        if not value_to_lookup:
                            continue

                        desc = lookup_label(
                            lookup,
                            value_to_lookup,
                            lookup_type
                        )

                        for k, v in desc.iteritems():
                            if v and not k == u'found':
                                n = u'{item}_desc_{key}'.format(
                                    item=item,
                                    key=k
                                )
                                index_data_dict.update(
                                    {n: index_data_dict.get(n, []) + [v]}
                                )

                else:
                    desc = lookup_label(lookup, value, lookup_type)

                    index_data_dict.update((
                        u'{item}_desc_{key}'.format(
                            item=item,
                            key=k
                        ), v)
                        for k, v in desc.iteritems() if v and not k == u'found'
                    )
                if item == u'geodescriptor_codes':
                    index_data_dict[u'dguid_codes'] = \
                        list(index_data_dict[u'geodescriptor_codes'])
            elif field_type == 'date':
                try:
                    date = parse(value, default=default_date)
                    index_data_dict[unicode(item)] = unicode(
                        date.isoformat()[:19] + u'Z'
                    )
                except ValueError:
                    continue
            elif item.endswith('_authors'):
                index_data_dict[unicode(item)] = value
                authors.extend(value)
            else:
                index_data_dict[unicode(item)] = value

            if authors:
                index_data_dict['authors'] = authors
                index_data_dict['authors_initials'] = list(
                    set(
                        [strip_accents(i[0]).upper() for i in authors]
                    )
                )

        return index_data_dict
Ejemplo n.º 37
0
def qdes_datasets_with_empty_recommended_fields(context, config={}):
    u"""
    List of all datasets that have no values against recommended metadata fields.
    """
    # Check access for sysadmin user's only
    check_access('config_option_update', context, None)

    # Get org_id config.
    org_id = config.get('org_id', None)

    # Get list of recommended fields.
    dataset_scheme = scheming_helpers.scheming_get_dataset_schema('dataset')
    dataset_recommended_fields = qdes_logic_helpers \
        .qdes_get_recommended_dataset_fields(dataset_scheme, 'dataset_fields')
    dataset_resource_recommended_fields = qdes_logic_helpers \
        .qdes_get_recommended_dataset_fields(dataset_scheme, 'resource_fields')

    # Build rows.
    rows = []
    i = 0
    limit = 10
    has_result = True
    point_of_contacts = {}
    while has_result:
        packages = get_action('current_package_list_with_resources')(
            context, {
                'limit': limit,
                'offset': i
            })
        if not packages:
            has_result = False
        else:
            i += limit

        for package in packages:
            if package.get('state') == 'active':
                # Load and cache point of contacts.
                contact_point_pos = package.get('contact_point', None)
                if not contact_point_pos in point_of_contacts:
                    point_of_contacts[contact_point_pos] = qdes_logic_helpers \
                        .get_point_of_contact(context, contact_point_pos) if contact_point_pos else {}

                # Get package organization.
                pkg_org = package.get('organization')

                # Filter based on org_id or package type.
                if (org_id and pkg_org.get('id') != org_id
                    ) or package.get('type') == 'dataservice':
                    continue

                # Get missing value fields.
                missing_values = qdes_logic_helpers \
                    .qdes_check_recommended_field_value(package, dataset_recommended_fields)

                # Get contact point.
                contact_point = point_of_contacts.get(contact_point_pos)

                # Build row.
                if missing_values:
                    row = qdes_logic_helpers \
                        .qdes_empty_recommended_field_row(package, contact_point, missing_values)
                    rows.append(row)

                # Check dataset resource metadata fields.
                for resource in package.get('resources', []):
                    # Get missing value fields.
                    missing_values = qdes_logic_helpers \
                        .qdes_check_recommended_field_value(resource, dataset_resource_recommended_fields)

                    # Build row.
                    if missing_values:
                        row = qdes_logic_helpers \
                            .qdes_empty_recommended_field_row(package, contact_point, missing_values, resource)
                        rows.append(row)

    return rows
Ejemplo n.º 38
0
    def before_index(self, pkg_dict):

        # Remove internal non-indexable fields

        # admin_notes
        pkg_dict.pop('admin_notes', None)
        pkg_dict.pop('extras_admin_notes', None)

        # sampling_procedure_notes
        pkg_dict.pop('sampling_procedure_notes', None)
        pkg_dict.pop('extras_sampling_procedure_notes', None)

        # response_rate_notes
        pkg_dict.pop('response_rate_notes', None)
        pkg_dict.pop('extras_response_rate_notes', None)

        # data_collection_notes
        pkg_dict.pop('data_collection_notes', None)
        pkg_dict.pop('extras_data_collection_notes', None)

        # weight_notes
        pkg_dict.pop('weight_notes', None)
        pkg_dict.pop('extras_weight_notes', None)

        # clean_ops_notes
        pkg_dict.pop('clean_ops_notes', None)
        pkg_dict.pop('extras_clean_ops_notes', None)

        # data_accs_notes
        pkg_dict.pop('data_accs_notes', None)
        pkg_dict.pop('extras_data_accs_notes', None)

        # Index labels on selected fields

        schema = scheming_get_dataset_schema('dataset')
        fields = [
            'data_collector', 'keywords', 'sampling_procedure',
            'operational_purpose_of_data', 'data_collection_technique',
            'process_status', 'identifiability'
        ]
        for field in fields:
            if pkg_dict.get(field):
                value = pkg_dict[field]
                try:
                    values = json.loads(pkg_dict[field])
                except ValueError:
                    values = [value]

                out = []

                for schema_field in schema['dataset_fields']:
                    if schema_field['field_name'] == field:
                        for item in values:
                            for choice in schema_field['choices']:
                                if choice['value'] == item:
                                    out.append(choice['label'])
                pkg_dict['vocab_' + field] = out

        # Index additional data for deposited dataset

        if pkg_dict.get('type') == 'deposited-dataset':
            # curator
            curator_id = pkg_dict.get('curator_id')
            if curator_id:
                try:
                    curator = toolkit.get_action('user_show')(
                        {
                            'ignore_auth': True
                        }, {
                            'id': curator_id
                        })
                    pkg_dict['curator_display_name'] = curator.get(
                        'display_name')
                except toolkit.ObjectNotFound:
                    pass
            # depositor
            depositor_id = pkg_dict.get('creator_user_id')
            if depositor_id:
                try:
                    depositor = toolkit.get_action('user_show')(
                        {
                            'ignore_auth': True
                        }, {
                            'id': depositor_id
                        })
                    pkg_dict['depositor_display_name'] = depositor.get(
                        'display_name')
                except toolkit.ObjectNotFound:
                    pass
            # data-container
            owner_org_dest_id = pkg_dict.get('owner_org_dest')
            if owner_org_dest_id:
                try:
                    owner_org_dest = toolkit.get_action('organization_show')(
                        {
                            'ignore_auth': True
                        }, {
                            'id': owner_org_dest_id
                        })
                    pkg_dict[
                        'owner_org_dest_display_name'] = owner_org_dest.get(
                            'display_name')
                except toolkit.ObjectNotFound:
                    pass

        return pkg_dict