Example #1
0
def get_format_description(context, data_dict):
    """
    Return the French and English values for the given formatCode.

    :param formatCode: Format Code (i.e. '10')
    :type formatCode: str

    :return: English, French and code values for given formatCode
    :rtype: dict

    :raises: ValidationError
    """
    massage = lambda in_: {
        'format_code': in_['value'],
        'en': in_['label'].get('en'),
        'fr': in_['label'].get('fr')
    }

    format_code = _get_or_bust(
        data_dict,
        'formatCode'
    ).zfill(2)

    preset = scheming_helpers.scheming_get_preset(u'ndm_format')
    format_codes = preset['choices']

    for fc in format_codes:
        if fc['value'] == format_code:
            return massage(fc)
    else:
        raise logic.ValidationError(
            ('formatCode \'{0}\' invalid'.format(format_code),)
        )
Example #2
0
def _smart_add(g, dataset_dict, dataset_ref, type_, key, sc_key=None):
    if sc_key:
        preset = scheming_helpers.scheming_get_preset(sc_key)

    try:
        values = dataset_dict[key + '_translated']
    except KeyError:
        values = dataset_dict[key]

    if not isinstance(values, list):
        values = [values]

    for value in values:
        if sc_key:
            for c in preset['choices']:
                if c['value'] == value:
                    value = c.get('label', value)
                    break

        if isinstance(value, dict):
            for k, v in value.iteritems():
                if not isinstance(v, list):
                    v = [v]

                for vv in v:
                    g.add((dataset_ref, type_, Literal(vv, lang=k)))
        elif value is None:
            return
        else:
            g.add((dataset_ref, type_, Literal(value)))
Example #3
0
    def after_search(self, search_results, search_params):
        pr = sh.scheming_get_preset("aafc_sector")
        choices = sh.scheming_field_choices(pr)
        #for result in search_results.get('results', []):
            #for extra in result.get('extras', []):
            #    if extra.get('key') in ['sector' ]:
            #        result[extra['key']] = "xxx" #extra['value']
        facets = search_results.get('search_facets')
        if not facets:
            return search_results
        for key, facet in facets.items():
            if key == 'tags':
               #log.info(">>>pop :" + key)
               #facets.pop('tags')
               #c.facet_titles.pop(key)
               continue
            if key != 'aafc_sector':
                continue
            #log.info(">>>###key:" + key)
            for item in facet['items']:
                field_value = item['name']				
                label = sh.scheming_choices_label(choices,field_value)
                item['display_name'] = label
        keys  = search_results.get('search_facets').keys()
        #log.info(">>>kesy before return  :" + str(keys))
        try:
            c.facet_titles.pop('tags')
        except (AttributeError, RuntimeError):
            pass
        
	return search_results
Example #4
0
def _smart_add(g, dataset_dict, dataset_ref, type_, key, sc_key=None):
    if sc_key:
        preset = scheming_helpers.scheming_get_preset(sc_key)

    try:
        values = dataset_dict[key + '_translated']
    except KeyError:
        values = dataset_dict[key]

    if not isinstance(values, list):
        values = [values]

    for value in values:
        if sc_key:
            for c in preset['choices']:
                if c['value'] == value:
                    value = c.get('label', value)
                    break

        if isinstance(value, dict):
            for k, v in value.iteritems():
                if not isinstance(v, list):
                    v = [v]

                for vv in v:
                    g.add((dataset_ref, type_, Literal(vv, lang=k)))
        elif value is None:
            return
        else:
            g.add((dataset_ref, type_, Literal(value)))
Example #5
0
 def test_scheming_get_preset(self):
     preset = scheming_get_preset(u'date')
     assert_equals(sorted((
         (u'display_snippet', u'date.html'),
         (u'form_snippet', u'date.html'),
         (u'validators', u'scheming_required isodate')
     )), sorted(preset.iteritems()))
Example #6
0
def get_format_description(context, data_dict):
    # noinspection PyUnresolvedReferences
    """
    Return the French and English values for the given formatCode.

    :param formatCode: Format Code (i.e. '10')
    :type formatCode: str

    :return: English, French and code values for given formatCode
    :rtype: dict

    :raises: ValidationError
    """
    massage = lambda in_: {
        'format_code': in_['value'],
        'en': in_['label'].get('en'),
        'fr': in_['label'].get('fr')
    }

    format_code = _get_or_bust(
        data_dict,
        'formatCode'
    ).zfill(2)

    preset = scheming_helpers.scheming_get_preset(u'ndm_format')
    format_codes = preset['choices']

    for fc in format_codes:
        if fc['value'] == format_code:
            return massage(fc)
    else:
        raise logic.ValidationError(
            ('formatCode \'{0}\' invalid'.format(format_code),)
        )
Example #7
0
 def test_scheming_get_preset(self):
     preset = scheming_get_preset(u'date')
     assert_equals(sorted((
         (u'display_snippet', u'date.html'),
         (u'form_snippet', u'date.html'),
         (u'validators', u'scheming_required isodate convert_to_json_if_date')
     )), sorted(preset.iteritems()))
Example #8
0
def get_last_publish_status(context, data_dict):
    """
    Return the French and English values for the given lastpublishstatuscode.

    :param lastPublishStatusCode: Publishing Status Code (i.e. '10')
    :type lastPublishStatusCode: str
    :return: English, French and code values for given lastpublishstatuscode
    :rtype: dict
    :raises: ValidationError
    """
    massage = lambda in_: {
        'last_publish_status_code': in_['value'],
        'en': in_['label'].get('en'),
        'fr': in_['label'].get('fr')
    }

    publish_status = _get_or_bust(
        data_dict,
        'lastPublishStatusCode'
    ).zfill(2)

    presets = scheming_helpers.scheming_get_preset('ndm_publish_status')
    publish_statuses = presets['choices']

    for ps in publish_statuses:
        if unicode(ps['value']) == unicode(publish_status):
            return massage(ps)
    else:
        raise logic.ValidationError(
            ('lastPublishStatusCode: \'{0}\' invalid'.format(publish_status),)
        )
Example #9
0
def get_last_publish_status(context, data_dict):
    """
    Return the French and English values for the given lastpublishstatuscode.

    :param lastPublishStatusCode: Publishing Status Code (i.e. '10')
    :type lastPublishStatusCode: str
    :return: English, French and code values for given lastpublishstatuscode
    :rtype: dict
    :raises: ValidationError
    """
    massage = lambda in_: {
        'last_publish_status_code': in_['value'],
        'en': in_['label'].get('en'),
        'fr': in_['label'].get('fr')
    }

    publish_status = _get_or_bust(
        data_dict,
        'lastPublishStatusCode'
    ).zfill(2)

    presets = scheming_helpers.scheming_get_preset('ndm_publish_status')
    publish_statuses = presets['choices']

    for ps in publish_statuses:
        if unicode(ps['value']) == unicode(publish_status):
            return massage(ps)
    else:
        raise logic.ValidationError(
            ('lastPublishStatusCode: \'{0}\' invalid'.format(publish_status),)
        )
Example #10
0
def get_form_snippet(field):
    if field['form_snippet']:
        return field['form_snippet']
    elif field['preset']:
        preset = sh.scheming_get_preset(field['preset'])
        if preset['form_snippet']:
            return preset['form_snippet']
    return 'text.html'
Example #11
0
def get_product_url(context, data_dict):
    # noinspection PyUnresolvedReferences
    """
    Return the fluent URL of the given format of the product.

    If no format is specified, return the url of the preferred (primary) format

    :param productId:
    :type productId:  str
    :param formatCode:
    :type formatCode: str

    :return: dict

    :raises: NotFound
    """
    product_id = _get_or_bust(data_dict, 'productId')
    format_code = data_dict.get('formatCode')
    lc = ckanapi.LocalCKAN(context=context)
    if format_code:
        results = lc.action.package_search(
            q='name:format-{product_id}_{format_code}'.format(
                product_id=product_id,
                format_code=format_code
            ).lower()
        ).get('results')
        if results:
            return results[0].get(u'url', {u'en': u'', u'fr': u''})
        else:
            raise _NotFound('{product_id}: no format {format_code} '
                            'found for product'.format(
                                product_id=product_id,
                                format_code=format_code
                                ))
    else:
        results = lc.action.package_search(
            q='name:format-{product_id}_*'.format(
                product_id=product_id
            ).lower()
        ).get('results')
        if not results:
            raise _NotFound(
                '{product_id}: no formats found for product'.format(
                    product_id=product_id
                )
            )

        choices = scheming_helpers.scheming_get_preset(
            'ndm_format'
        ).get('choices')

        sorted_choices = sorted(choices, key=lambda k: k.get('weight', '99'))
        for choice in sorted_choices:
            for result in results:
                if result.get(u'format_code') == choice['value']:
                    return result.get(u'url', {u'en': u'', u'fr': u''})

        return {u'en': u'', u'fr': u''}
Example #12
0
def get_product_url(context, data_dict):
    # noinspection PyUnresolvedReferences
    """
    Return the fluent URL of the given format of the product.

    If no format is specified, return the url of the preferred (primary) format

    :param productId:
    :type productId:  str
    :param formatCode:
    :type formatCode: str

    :return: dict

    :raises: NotFound
    """
    product_id = _get_or_bust(data_dict, 'productId')
    format_code = data_dict.get('formatCode')
    lc = ckanapi.LocalCKAN(context=context)
    if format_code:
        results = lc.action.package_search(
            q='name:format-{product_id}_{format_code}'.format(
                product_id=product_id,
                format_code=format_code
            ).lower()
        ).get('results')
        if results:
            return results[0].get(u'url', {u'en': u'', u'fr': u''})
        else:
            raise _NotFound('{product_id}: no format {format_code} '
                            'found for product'.format(
                                product_id=product_id,
                                format_code=format_code
                                ))
    else:
        results = lc.action.package_search(
            q='name:format-{product_id}_*'.format(
                product_id=product_id
            ).lower()
        ).get('results')
        if not results:
            raise _NotFound(
                '{product_id}: no formats found for product'.format(
                    product_id=product_id
                )
            )

        choices = scheming_helpers.scheming_get_preset(
            'ndm_format'
        ).get('choices')

        sorted_choices = sorted(choices, key=lambda k: k.get('weight', '99'))
        for choice in sorted_choices:
            for result in results:
                if result.get(u'format_code') == choice['value']:
                    return result.get(u'url', {u'en': u'', u'fr': u''})

        return {u'en': u'', u'fr': u''}
 def test_scheming_get_preset(self):
     preset = scheming_get_preset(u"date")
     assert sorted((
         (u"display_snippet", u"date.html"),
         (u"form_snippet", u"date.html"),
         (
             u"validators",
             u"scheming_required isodate convert_to_json_if_date",
         ),
     )) == sorted(preset.items())
Example #14
0
def get_product_type(context, data_dict):
    # noinspection PyUnresolvedReferences
    """Return the French and English titles for the given product_type_code.

    Example query:

    .. code:: python

        r = requests.get('/api/3/action/GetProductType?productType=13')
        print(r.json())

    Example response:

    .. code:: json

        {
            "success": true,
            "result": {
                "fr": "Graphique",
                "en": "Chart",
                "product_type_code": "13"
            }
        }


    :param productType: Product Type Code (i.e. '10') or '*' to receive a
        list of all product_types
    :type productType: str
    :return: English, French and code values for given product_type
    :rtype: dict
    :raises: ValidationError
    """
    massage = lambda in_: {
        'product_type_code': in_['value'],
        'en': in_['label'].get('en'),
        'fr': in_['label'].get('fr')
    }

    product_type = _get_or_bust(data_dict, 'productType')

    presets = scheming_helpers.scheming_get_preset('ndm_product')
    product_types = presets['choices']

    if product_type == '*':
        return [massage(pt) for pt in product_types]
    else:
        for pt in product_types:
            if unicode(pt['value']) == unicode(product_type):
                return massage(pt)
        else:
            raise logic.ValidationError(
                ('productType: \'{0}\' not valid'.format(product_type),)
            )
Example #15
0
def get_product_type(context, data_dict):
    # noinspection PyUnresolvedReferences
    """Return the French and English titles for the given product_type_code.

    Example query:

    .. code:: python

        r = requests.get('/api/3/action/GetProductType?productType=13')
        print(r.json())

    Example response:

    .. code:: json

        {
            "success": true,
            "result": {
                "fr": "Graphique",
                "en": "Chart",
                "product_type_code": "13"
            }
        }


    :param productType: Product Type Code (i.e. '10') or '*' to receive a
        list of all product_types
    :type productType: str
    :return: English, French and code values for given product_type
    :rtype: dict
    :raises: ValidationError
    """
    massage = lambda in_: {
        'product_type_code': in_['value'],
        'en': in_['label'].get('en'),
        'fr': in_['label'].get('fr')
    }

    product_type = _get_or_bust(data_dict, 'productType')

    presets = scheming_helpers.scheming_get_preset('ndm_product_type')
    product_types = presets['choices']

    if product_type == '*':
        return [massage(pt) for pt in product_types]
    else:
        for pt in product_types:
            if unicode(pt['value']) == unicode(product_type):
                return massage(pt)
        else:
            raise logic.ValidationError(
                ('productType: \'{0}\' not valid'.format(product_type),)
            )
Example #16
0
def openness_score(pkg):
    score = 1
    fmt_choices = scheming_get_preset('canada_resource_format')['choices']
    resource_formats = set(r['format'] for r in pkg['resources'])
    for f in fmt_choices:
        if 'openness_score' not in f:
            continue
        if f['value'] not in resource_formats:
            continue
        score = max(score, f['openness_score'])

    for r in pkg['resources']:
        if 'data_includes_uris' in r.get('data_quality', []):
            score = max(4, score)
            if 'data_includes_links' in r.get('data_quality', []):
                score = max(5, score)
    return score
Example #17
0
def openness_score(pkg):
    score = 1
    fmt_choices = scheming_get_preset('canada_resource_format')['choices']
    resource_formats = set(r['format'] for r in pkg['resources'])
    for f in fmt_choices:
        if 'openness_score' not in f:
            continue
        if f['value'] not in resource_formats:
            continue
        score = max(score, f['openness_score'])

    for r in pkg['resources']:
        if 'data_includes_uris' in r.get('data_quality', []):
            score = max(4, score)
            if 'data_includes_links' in r.get('data_quality', []):
                score = max(5, score)
    return score
Example #18
0
    def configure(self, config_):
        self.dataset_types = OrderedDict([
            (schema['dataset_type'], schema['about'])
            for schema in scheming_helpers.scheming_dataset_schemas().values()
        ])
        self.member_countries = OrderedDict([
            (choice['value'], choice['label'])
            for choice in scheming_helpers.scheming_get_preset(
                'member_countries')['choices']
        ])

        filepath = os.path.join(os.path.dirname(__file__), 'data/eez.json')
        if os.path.isfile(filepath):
            with open(filepath) as file:
                logger.debug('Updating EEZ list')
                collection = json.load(file)
                spc_utils.eez.update(collection['features'])

        toolkit.add_ckan_admin_tab(config_, 'search_queries.index',
                                   'Search Queries')
        toolkit.add_ckan_admin_tab(config_, 'ingest.index', 'Ingest')
def add_to_search_index(data_dict_id, in_bulk=False):

    log = logging.getLogger('ckan')
    od_search_solr_url = config.get(SEARCH_INTEGRATION_URL_OPTION, "")
    od_search_enabled = config.get(SEARCH_INTEGRATION_ENABLED_OPTION, False)
    od_search_od_url_en = config.get(SEARCH_INTEGRATION_OD_URL_EN_OPTION, "https://open.canada.ca/data/en/dataset/")
    od_search_od_url_fr = config.get(SEARCH_INTEGRATION_OD_URL_FR_OPTION, "https://ouvert.canada.ca/data/fr/dataset/")

    # Retrieve the full record - it has additional information including organization title and metadata modified date
    # that are not available in the regular data dict

    portal = LocalCKAN()
    data_dict = portal.action.package_show(id=data_dict_id)

    if not od_search_enabled:
        return
    try:
        subject_codes = scheming_choices_label_by_value(scheming_get_preset('canada_subject')['choices'])
        type_codes = scheming_choices_label_by_value(scheming_get_preset('canada_resource_related_type')['choices'])
        collection_codes = scheming_choices_label_by_value(scheming_get_preset('canada_collection')['choices'])
        juristiction_codes = scheming_choices_label_by_value(scheming_get_preset('canada_jurisdiction')['choices'])
        resource_type_codes = scheming_choices_label_by_value(scheming_get_preset('canada_resource_type')['choices'])
        frequency_codes = scheming_choices_label_by_value(scheming_get_preset('canada_frequency')['choices'])

        org_title = data_dict['organization']['title'].split('|')
        owner_org_title_en = org_title[0].strip()
        owner_org_title_fr = org_title[1].strip()

        subjects_en = []
        subjects_fr = []
        subjects = json.loads(data_dict['subject']) if \
            isinstance(data_dict['subject'], str) else data_dict['subject']
        for s in subjects:
            subjects_en.append(subject_codes['en'][s].replace(",", ""))
            subjects_fr.append(subject_codes['fr'][s].replace(",", ""))

        resource_type_en = []
        resource_type_fr = []
        resource_fmt = []
        resource_title_en = []
        resource_title_fr = []
        for r in data_dict['resources']:
            resource_type_en.append(
                resource_type_codes['en'][r['resource_type']]
                if r['resource_type'] in resource_type_codes['en'] else '')
            resource_type_fr.append(
                resource_type_codes['fr'][r['resource_type']]
                if r['resource_type'] in resource_type_codes['fr'] else '')
            resource_fmt.append(r['format'])

            resource_name = json.loads(r['name_translated']) if \
                isinstance(r['name_translated'], str) else r['name_translated']
            if 'en' in resource_name:
                resource_title_en.append(resource_name['en'])
            elif 'fr-t-en' in resource_name:
                resource_title_en.append(resource_name['fr-t-en'])
            if 'fr' in resource_name:
                resource_title_fr.append(resource_name['fr'].strip())
            elif 'en-t-fr' in resource_name:
                resource_title_fr.append(resource_name['en-t-fr'].strip())

        notes_translated = json.loads(data_dict['notes_translated']) if \
            isinstance(data_dict['notes_translated'], str) else data_dict['notes_translated']
        title_translated = json.loads(data_dict['title_translated']) if \
            isinstance(data_dict['title_translated'], str) else data_dict['title_translated']
        od_obj = {
            'portal_type_en_s': type_codes['en'][data_dict['type']],
            'portal_type_fr_s': type_codes['fr'][data_dict['type']],
            'collection_type_en_s': collection_codes['en'][data_dict['collection']],
            'collection_type_fr_s': collection_codes['fr'][data_dict['collection']],
            'jurisdiction_en_s': juristiction_codes['en'][data_dict['jurisdiction']],
            'jurisdiction_fr_s': juristiction_codes['fr'][data_dict['jurisdiction']],
            'owner_org_title_en_s': owner_org_title_en,
            'owner_org_title_fr_s': owner_org_title_fr,
            'subject_en_s': subjects_en,
            'subject_fr_s': subjects_fr,
            'resource_type_en_s': list(set(resource_type_en)),
            'resource_type_fr_s': list(set(resource_type_fr)),
            'update_cycle_en_s': frequency_codes['en'][data_dict['frequency']],
            'update_cycle_fr_s': frequency_codes['fr'][data_dict['frequency']],
            'id_name_s': data_dict['name'],
            'id': data_dict['name'],
            'owner_org_s': data_dict['owner_org'],
            'author_txt': data_dict['author'] if 'author' in data_dict else '',
            'description_txt_en': notes_translated['en'] if 'en' in data_dict['notes_translated'] else '',
            'description_txt_fr': notes_translated['fr'] if 'fr' in data_dict['notes_translated'] else '',
            'description_xlt_txt_fr': notes_translated['fr-t-en'] if 'fr-t-en' in notes_translated else '',
            'description_xlt_txt_en': notes_translated['en-t-fr'] if 'en-t-f-r' in notes_translated else '',
            'title_en_s': title_translated['en'] if 'en' in title_translated else '',
            'title_fr_s': title_translated['fr'] if 'fr' in title_translated else '',
            'title_xlt_fr_s': title_translated['fr-t-en'] if 'fr-t-en' in title_translated else '',
            'title_xlt_en_s': title_translated['en-t-fr'] if 'en-t-fr' in title_translated else '',
            'resource_format_s': list(set(resource_fmt)),
            'resource_title_en_s': resource_title_en,
            'resource_title_fr_s': resource_title_fr,
            'last_modified_tdt': parser.parse(data_dict['metadata_modified']).replace(microsecond=0).isoformat() + 'Z',
            'ogp_link_en_s': '{0}{1}'.format(od_search_od_url_en, data_dict['name']),
            'ogp_link_fr_s': '{0}{1}'.format(od_search_od_url_fr, data_dict['name']),
        }

        keywords = json.loads(data_dict['keywords']) if \
            isinstance(data_dict['keywords'], str) else data_dict['keywords']
        if 'en' in keywords:
            od_obj['keywords_en_s'] = keywords['en']
        elif 'fr-t-en' in keywords:
            od_obj['keywords_en_s'] = keywords['fr-t-en']
        if 'fr' in keywords:
            od_obj['keywords_fr_s'] = keywords['fr']
        elif 'en-t-fr' in keywords:
            od_obj['keywords_fr_s'] = keywords['en-t-fr']

        solr = pysolr.Solr(od_search_solr_url)
        if in_bulk:
            solr.add([od_obj])
        else:
            solr.delete(id=od_obj['id'])
            solr.add([od_obj])
            solr.commit()
    except Exception as x:
        log.error("Exception: {} {}".format(x.message, x.args))
Example #20
0
def add_to_search_index(data_dict_id, in_bulk=False):

    log = logging.getLogger('ckan')
    od_search_solr_url = config.get(SEARCH_INTEGRATION_URL_OPTION, "")
    od_search_enabled = config.get(SEARCH_INTEGRATION_ENABLED_OPTION, False)
    od_search_od_url_en = config.get(SEARCH_INTEGRATION_OD_URL_EN_OPTION, "https://open.canada.ca/data/en/dataset/")
    od_search_od_url_fr = config.get(SEARCH_INTEGRATION_OD_URL_FR_OPTION, "https://ouvert.canada.ca/data/fr/dataset/")

    # Retrieve the full record - it has additional information including organization title and metadata modified date
    # that are not available in the regular data dict

    portal = LocalCKAN()
    data_dict = portal.action.package_show(id=data_dict_id)

    if not od_search_enabled:
        return
    try:
        subject_codes = scheming_choices_label_by_value(scheming_get_preset('canada_subject')['choices'])
        type_codes = scheming_choices_label_by_value(scheming_get_preset('canada_resource_related_type')['choices'])
        collection_codes = scheming_choices_label_by_value(scheming_get_preset('canada_collection')['choices'])
        juristiction_codes = scheming_choices_label_by_value(scheming_get_preset('canada_jurisdiction')['choices'])
        resource_type_codes = scheming_choices_label_by_value(scheming_get_preset('canada_resource_type')['choices'])
        frequency_codes = scheming_choices_label_by_value(scheming_get_preset('canada_frequency')['choices'])

        org_title = data_dict['organization']['title'].split('|')
        owner_org_title_en = org_title[0].strip()
        owner_org_title_fr = org_title[1].strip()

        subjects_en = []
        subjects_fr = []
        subjects = json.loads(data_dict['subject']) if \
            isinstance(data_dict['subject'], str) else data_dict['subject']
        for s in subjects:
            subjects_en.append(subject_codes['en'][s].replace(",", ""))
            subjects_fr.append(subject_codes['fr'][s].replace(",", ""))

        resource_type_en = []
        resource_type_fr = []
        resource_fmt = []
        resource_title_en = []
        resource_title_fr = []
        for r in data_dict['resources']:
            resource_type_en.append(
                resource_type_codes['en'][r['resource_type']]
                if r['resource_type'] in resource_type_codes['en'] else '')
            resource_type_fr.append(
                resource_type_codes['fr'][r['resource_type']]
                if r['resource_type'] in resource_type_codes['fr'] else '')
            resource_fmt.append(r['format'])

            resource_name = json.loads(r['name_translated']) if \
                isinstance(r['name_translated'], str) else r['name_translated']
            if 'en' in resource_name:
                resource_title_en.append(resource_name['en'])
            elif 'en-t-fr' in resource_name:
                resource_title_en.append(resource_name['en-t-fr'])
            if 'fr' in resource_name:
                resource_title_fr.append(resource_name['fr'].strip())
            elif 'fr-t-en' in resource_name:
                resource_title_fr.append(resource_name['fr-t-en'].strip())
        display_options = []
        if 'display_flags' in data_dict:
            for d in data_dict['display_flags']:
                display_options.append(d)
        notes_translated = json.loads(data_dict['notes_translated']) if \
            isinstance(data_dict['notes_translated'], str) else data_dict['notes_translated']
        title_translated = json.loads(data_dict['title_translated']) if \
            isinstance(data_dict['title_translated'], str) else data_dict['title_translated']
        od_obj = {
            'portal_type_en_s': type_codes['en'][data_dict['type']],
            'portal_type_fr_s': type_codes['fr'][data_dict['type']],
            'collection_type_en_s': collection_codes['en'][data_dict['collection']],
            'collection_type_fr_s': collection_codes['fr'][data_dict['collection']],
            'jurisdiction_en_s': juristiction_codes['en'][data_dict['jurisdiction']],
            'jurisdiction_fr_s': juristiction_codes['fr'][data_dict['jurisdiction']],
            'owner_org_title_en_s': owner_org_title_en,
            'owner_org_title_fr_s': owner_org_title_fr,
            'subject_en_s': subjects_en,
            'subject_fr_s': subjects_fr,
            'resource_type_en_s': list(set(resource_type_en)),
            'resource_type_fr_s': list(set(resource_type_fr)),
            'update_cycle_en_s': frequency_codes['en'][data_dict['frequency']],
            'update_cycle_fr_s': frequency_codes['fr'][data_dict['frequency']],
            'id_name_s': data_dict['name'],
            'id': data_dict['name'],
            'owner_org_s': data_dict['owner_org'],
            'author_txt': data_dict['author'] if 'author' in data_dict else '',
            'description_txt_en': notes_translated['en'] if 'en' in data_dict['notes_translated'] else '',
            'description_txt_fr': notes_translated['fr'] if 'fr' in data_dict['notes_translated'] else '',
            'description_xlt_txt_fr': notes_translated['fr-t-en'] if 'fr-t-en' in notes_translated else '',
            'description_xlt_txt_en': notes_translated['en-t-fr'] if 'en-t-fr' in notes_translated else '',
            'title_en_s': title_translated['en'] if 'en' in title_translated else '',
            'title_fr_s': title_translated['fr'] if 'fr' in title_translated else '',
            'title_xlt_fr_s': title_translated['fr-t-en'] if 'fr-t-en' in title_translated else '',
            'title_xlt_en_s': title_translated['en-t-fr'] if 'en-t-fr' in title_translated else '',
            'resource_format_s': list(set(resource_fmt)),
            'resource_title_en_s': resource_title_en,
            'resource_title_fr_s': resource_title_fr,
            'last_modified_tdt': parser.parse(data_dict['metadata_modified']).replace(microsecond=0).isoformat() + 'Z',
            'published_tdt': parser.parse(data_dict['date_published']).replace(microsecond=0).isoformat() + 'Z',
            'ogp_link_en_s': '{0}{1}'.format(od_search_od_url_en, data_dict['name']),
            'ogp_link_fr_s': '{0}{1}'.format(od_search_od_url_fr, data_dict['name']),
            'display_options_s': display_options
        }

        if 'en' in notes_translated:
            od_obj['desc_summary_txt_en'] = get_summary(notes_translated['en'].strip(), 'en')
        elif 'en-t-fr' in notes_translated:
            od_obj['desc_summary_txt_en'] = get_summary(notes_translated['en-t-fr'].strip(), 'en')
        if 'fr' in notes_translated:
            od_obj['desc_summary_txt_fr'] = get_summary(notes_translated['fr'].strip(), 'fr')
        elif 'en-t-fr' in notes_translated:
            od_obj['desc_summary_txt_fr'] = get_summary(notes_translated['fr-t-en'].strip(), 'fr')

        keywords = json.loads(data_dict['keywords']) if \
            isinstance(data_dict['keywords'], str) else data_dict['keywords']
        if 'en' in keywords:
            od_obj['keywords_en_s'] = keywords['en']
        elif 'en-t-fr' in keywords:
            od_obj['keywords_xlt_en_s'] = keywords['en-t-fr']
        if 'fr' in keywords:
            od_obj['keywords_fr_s'] = keywords['fr']
        elif 'fr-t-en' in keywords:
            od_obj['keywords_xlt_fr_s'] = keywords['fr-t-en']

        if 'data_series_issue_identification' in data_dict:
            if 'en' in data_dict['data_series_issue_identification']:
                od_obj['data_series_issue_identification_en'] = data_dict['data_series_issue_identification']['en']
            else:
                od_obj['data_series_issue_ident_en'] = '-'
            if 'fr' in data_dict['data_series_issue_identification']:
                od_obj['data_series_issue_identification_fr'] = data_dict['data_series_issue_identification']['fr']
            else:
                od_obj['data_series_issue_ident_fr'] = '-'
        else:
            od_obj['data_series_issue_ident_en'] = '-'
            od_obj['data_series_issue_ident_fr'] = '-'
            
        solr = pysolr.Solr(od_search_solr_url)
        if in_bulk:
            solr.add([od_obj])
        else:
            solr.delete(id=od_obj['id'])
            solr.add([od_obj])
            solr.commit()
    except Exception as x:
        log.error("Exception: {} {}".format(x.message, x.args))
Example #21
0
def lookup_label(field_name, field_value, lookup_type):
    """
    Given the name of a field, the value of the field, and the type of lookup
    to perform, resolve the code and return the label.

    :param field_name: The name of the field being resolved (ex: format_code).
    :param field_value: The value of the field being resolved. (ex: '33')
    :param lookup_type: The type of field being resolved (ex: codeset)

    :return dict
    """
    lc = ckanapi.LocalCKAN()

    if not field_value:
        return {u'en': '', u'fr': ''}

    default = {
        u'en': 'label for ' + field_value,
        u'fr': 'description de ' + field_value,
        u'found': False
    }

    if not lookup_type:
        return default

    if lookup_type == 'preset':
        preset = scheming_get_preset('ndm_{f}'.format(f=field_name))
        if not preset:
            return default

        choices = preset['choices']
        for choice in choices:
            if choice['value'] == field_value:
                return choice['label']

        return default
    elif lookup_type == 'codeset':
        results = lc.action.package_search(
            q=(u'dataset_type:codeset AND '
               'codeset_type:{f} AND '
               'codeset_value:{v}').format(f=field_name, v=field_value))

        if not results[u'count']:
            return default

        result = results[u'results'][-1][u'title']
        if isinstance(result, basestring):
            try:
                result = ast.literal_eval(result)
            except SyntaxError:
                pass

        return result
    else:
        results = lc.action.package_search(
            q=(u'dataset_type:{lookup_type} AND '
               'name:{lookup_type}-{field_value}').format(
                   lookup_type=lookup_type, field_value=field_value.lower()))
        if not results[u'count']:
            return default

        result = results[u'results'][-1][u'title']
        if isinstance(result, basestring):
            try:
                result = ast.literal_eval(result)
            except SyntaxError:
                pass

        return result
Example #22
0
def lookup_label(field_name, field_value, lookup_type):
    """
    Given the name of a field, the value of the field, and the type of lookup
    to perform, resolve the code and return the label.

    :param field_name: The name of the field being resolved (ex: format_code).
    :param field_value: The value of the field being resolved. (ex: '33')
    :param lookup_type: The type of field being resolved (ex: codeset)

    :return dict
    """
    lc = ckanapi.LocalCKAN()

    if not field_value:
        return {u'en': '', u'fr': ''}

    default = {
        u'en': 'label for ' + field_value,
        u'fr': 'description de ' + field_value,
        u'found': False
    }

    if not lookup_type:
        return default

    if lookup_type == 'preset':
        preset = scheming_get_preset('ndm_{f}'.format(f=field_name))
        if not preset:
            return default

        choices = preset['choices']
        for choice in choices:
            if choice['value'] == field_value:
                return choice['label']

        return default
    elif lookup_type == 'codeset':
        results = lc.action.package_search(
            q=(
                u'dataset_type:codeset AND '
                'codeset_type:{f} AND '
                'codeset_value:{v}'
            ).format(
                f=field_name,
                v=field_value
            )
        )

        if not results[u'count']:
            return default

        result = results[u'results'][-1][u'title']
        if isinstance(result, basestring):
            try:
                result = ast.literal_eval(result)
            except SyntaxError:
                pass

        return result
    else:
        results = lc.action.package_search(
            q=(
                u'dataset_type:{lookup_type} AND '
                'name:{lookup_type}-{field_value}'
            ).format(
                lookup_type=lookup_type,
                field_value=field_value.lower()
            )
        )
        if not results[u'count']:
            return default

        result = results[u'results'][-1][u'title']
        if isinstance(result, basestring):
            try:
                result = ast.literal_eval(result)
            except SyntaxError:
                pass

        return result