Ejemplo n.º 1
0
def update_product_geo(context, data_dict):
    # noinspection PyUnresolvedReferences
    """
    Update the specificgeocode_bi_txtm value and sets the geo level
    (geolevel_*) accordingly.

    :param productId: ID of the product to update.
    :type productId: str

    :param dguids: Geo-code values status code
    :type dguids: list of strings

    :return: updated package
    :rtype: dict
    """
    product_id = _get_or_bust(data_dict, 'productId')
    dguids = _get_or_bust(data_dict, 'dguids')

    lc = ckanapi.LocalCKAN(context=context)

    if isinstance(dguids, basestring):
        dguids = [x.strip() for x in dguids.split(';')]

    response = lc.action.package_search(
        q='product_id_new:{product_id}'.format(
            product_id=product_id
        )
    )

    if response['count'] == 0:
        raise _ValidationError(('Record not found.',))
    elif response['count'] > 1:
        raise _ValidationError(
            ('More than one record identified with these values. '
             'Please contact CKAN IT',)
        )

    pkg_dict = response['results'][0]
    pkg_dict['geolevel_codes'] = list(set(sc[:5] for sc in dguids))

    if pkg_dict['product_type_code'] in VALID_DATA_TYPES:
        # Data product geodescriptors (for which there can be tens of
        # thousands) are stored using the geodescriptor service instead of
        # directly on the package.
        geo.clear_geodescriptors_for_package(pkg_dict['product_id_new'])
        for geo_code in dguids:
            geo.update_relationship(pkg_dict['product_id_new'], geo_code)
    else:
        # Non-data products simply have the geodescriptors assigned to the
        # package.
        pkg_dict['geodescriptor_codes'] = dguids

    # TODO: Check the results?
    lc.action.package_update(**pkg_dict)

    return lc.action.package_show(id=pkg_dict['id'])
Ejemplo n.º 2
0
def register_data_product(context, data_dict):
    # noinspection PyUnresolvedReferences
    """
    Register a new data product based on a given `parentProductId` (the 8-digit
    ID of a cube) and the desired `productTypeCode`. The new product's fields
    will be populated based on the cube record.

    .. note::

        If the cube is missing English and French titles, the cube will be
        updated.

    .. note::

        As the schemas for tables, indicators, charts, and maps do not
        yet exist, this method is not thoroughly tested.

    :param parentProductId: 8-digit cube id
    :type parentProductId: str
    :param productType: 2-digit product type code
    :type productType: str
    :param productTitle: EN/FR title dictionary
    :type productTitle: dict
    :return: newly-registered product id
    :rtype: dict
    """
    # These are the only product types that can be registered using
    # this method as these are the only "data products".
    # TODO: Can we pull this from somewhere? Presets.yaml does not
    #       necessarily have the exact schema name in ndm_product_type.
    CUBE_PRODUCT_TYPE = u'10'

    cube_id = _get_or_bust(data_dict, 'parentProductId')
    title = _get_or_bust(data_dict, 'productTitle')
    product_type = _get_or_bust(data_dict, 'productType').zfill(2)

    if product_type == CUBE_PRODUCT_TYPE:
        raise _ValidationError(
            ('Please use RegisterCube to register a cube',)
        )
    elif product_type not in VALID_DATA_TYPES:
        raise _ValidationError(
            ('Invalid data productType, only data products may be registered '
             'with this service',)
        )

    lc = ckanapi.LocalCKAN(context=context)
    cube_dict = lc.action.GetCube(cubeId=cube_id)

    product_type_schema = lc.action.GetDatasetSchema(
        name=VALID_DATA_TYPES[product_type]
    )

    # Copy fields that overlap between the cubes and the destination
    # type.
    copied_fields = {}
    for field in product_type_schema['dataset_fields']:
        field_name = field['field_name']
        if field_name in cube_dict:
            copied_fields[field_name] = cube_dict[field_name]

    # FIXME: This is not atomic. If this API method is called quickly
    #        in parallel, this product ID could no longer be free.
    product_id = lc.action.GetNextProductId(
        parentProductId=cube_id,
        productType=product_type
    )

    # Overwrite/add some fields that we don't want to inherit
    # from the cube.
    copied_fields.update({
        'type': VALID_DATA_TYPES[product_type],
        'name': product_id,
        'owner_org': 'statcan',
        'product_id_new': product_id,
        'top_parent_id': cube_id,
        'title': title,
        'product_type_code': product_type
    })

    if product_type == '11':
        # Never currently set before this point, but just in case we don't
        # want to trample it.
        if not copied_fields.get('content_type_codes'):
            copied_fields['content_type_codes'] = ['2012']

    # We don't want to store geodescriptors for data products as part
    # of the dataset, as there can be tens of thousands of them. The poor
    # performance of datasets in CKAN means this would criple normal package
    # creates, updates, and fetches.
    new_pkg = lc.action.package_create(**copied_fields)

    geo.clear_geodescriptors_for_package(new_pkg['product_id_new'])

    geo_codes = data_dict.get('geodescriptor_codes')
    if geo_codes:
        for geo_code in geo_codes:
            geo.update_relationship(
                new_pkg['product_id_new'],
                geo_code
            )

    if product_type == '11' and product_id.endswith('01'):
        lc.action.UpdateDefaultView(cubeId=cube_id, defaultView=product_id)

    return {'product_id_new': product_id}
Ejemplo n.º 3
0
def register_data_product(context, data_dict):
    # noinspection PyUnresolvedReferences
    """
    Register a new data product based on a given `parentProductId` (the 8-digit
    ID of a cube) and the desired `productTypeCode`. The new product's fields
    will be populated based on the cube record.

    .. note::

        If the cube is missing English and French titles, the cube will be
        updated.

    .. note::

        As the schemas for tables, indicators, charts, and maps do not
        yet exist, this method is not thoroughly tested.

    :param parentProductId: 8-digit cube id
    :type parentProductId: str
    :param productType: 2-digit product type code
    :type productType: str
    :param productTitle: EN/FR title dictionary
    :type productTitle: dict
    :return: newly-registered product id
    :rtype: dict
    """
    # These are the only product types that can be registered using
    # this method as these are the only "data products".
    # TODO: Can we pull this from somewhere? Presets.yaml does not
    #       necessarily have the exact schema name in ndm_product_type.
    CUBE_PRODUCT_TYPE = u'10'

    cube_id = _get_or_bust(data_dict, 'parentProductId')
    title = _get_or_bust(data_dict, 'productTitle')
    product_type = _get_or_bust(data_dict, 'productType').zfill(2)

    if product_type == CUBE_PRODUCT_TYPE:
        raise _ValidationError(
            ('Please use RegisterCube to register a cube',)
        )
    elif product_type not in VALID_DATA_TYPES:
        raise _ValidationError(
            ('Invalid data productType, only data products may be registered '
             'with this service',)
        )

    lc = ckanapi.LocalCKAN(context=context)
    cube_dict = lc.action.GetCube(cubeId=cube_id)

    product_type_schema = lc.action.GetDatasetSchema(
        name=VALID_DATA_TYPES[product_type]
    )

    # Copy fields that overlap between the cubes and the destination
    # type.
    copied_fields = {}
    for field in product_type_schema['dataset_fields']:
        field_name = field['field_name']
        if field_name in cube_dict:
            copied_fields[field_name] = cube_dict[field_name]

    # FIXME: This is not atomic. If this API method is called quickly
    #        in parallel, this product ID could no longer be free.
    product_id = lc.action.GetNextProductId(
        parentProductId=cube_id,
        productType=product_type
    )

    # Overwrite/add some fields that we don't want to inherit
    # from the cube.
    copied_fields.update({
        'type': VALID_DATA_TYPES[product_type],
        'name': product_id,
        'owner_org': 'statcan',
        'product_id_new': product_id,
        'top_parent_id': cube_id,
        'title': title,
        'product_type_code': product_type
    })

    if product_type == '11':
        # Never currently set before this point, but just in case we don't
        # want to trample it.
        if not copied_fields.get('content_type_codes'):
            copied_fields['content_type_codes'] = ['2012']

    # We don't want to store geodescriptors for data products as part
    # of the dataset, as there can be tens of thousands of them. The poor
    # performance of datasets in CKAN means this would criple normal package
    # creates, updates, and fetches.
    new_pkg = lc.action.package_create(**copied_fields)

    geo.clear_geodescriptors_for_package(new_pkg['product_id_new'])

    geo_codes = data_dict.get('geodescriptor_codes')
    if geo_codes:
        for geo_code in geo_codes:
            geo.update_relationship(
                new_pkg['product_id_new'],
                geo_code
            )

    if product_type == '11' and product_id.endswith('01'):
        lc.action.UpdateDefaultView(cubeId=cube_id, defaultView=product_id)

    return {'product_id_new': product_id}
Ejemplo n.º 4
0
def update_product_geo(context, data_dict):
    # noinspection PyUnresolvedReferences
    """
    Update the specificgeocode_bi_txtm value and sets the geo level
    (geolevel_*) accordingly.

    :param productId: ID of the product to update.
    :type productId: str

    :param dguids: Geo-code values status code
    :type dguids: list of strings

    :return: updated package
    :rtype: dict
    """
    product_id = _get_or_bust(data_dict, 'productId')
    dguids = _get_or_bust(data_dict, 'dguids')

    lc = ckanapi.LocalCKAN(context=context)

    if isinstance(dguids, basestring):
        dguids = [x.strip() for x in dguids.split(';')]

    for dguid in dguids:
        if not stcndm_helpers.is_dguid(dguid):
            _msg = u'Expecting dguid, got {code} instead'.format(code=dguid)
            raise _ValidationError({u'geodescriptor_codes': _msg})

    response = lc.action.package_search(
        q='product_id_new:{product_id}'.format(
            product_id=product_id
        )
    )

    if response['count'] == 0:
        raise _ValidationError(('Record not found.',))
    elif response['count'] > 1:
        raise _ValidationError(
            ('More than one record identified with these values. '
             'Please contact CKAN IT',)
        )

    pkg_dict = response['results'][0]
    old_geolevel_codes = pkg_dict.get(u'geolevel_codes', [])
    pkg_dict['geolevel_codes'] = list(
        set(stcndm_helpers.get_geolevel(sc) for sc in dguids)
    )

    if pkg_dict['product_type_code'] in VALID_DATA_TYPES:
        # Data product geodescriptors (for which there can be tens of
        # thousands) are stored using the geodescriptor service instead of
        # directly on the package.
        geo.clear_geodescriptors_for_package(pkg_dict['product_id_new'])
        for geo_code in dguids:
            geo.update_relationship(pkg_dict['product_id_new'], geo_code)
    else:
        # Non-data products simply have the geodescriptors assigned to the
        # package.
        pkg_dict['geodescriptor_codes'] = dguids

    if old_geolevel_codes == pkg_dict.get(u'geolevel_codes', []):
        # force the re-index of the package so dguids make it into solr
        query = search.PackageSearchQuery()
        q = {
            'q': 'id:{id}'.format(id=pkg_dict['id']),
            'fl': 'data_dict',
            'wt': 'json',
            'fq': 'site_id:"%s"' % config.get('ckan.site_id')
        }
        pkg_to_index = json.loads(query.run(q)['results'][0]['data_dict'])
        psi = search.PackageSearchIndex()
        psi.index_package(pkg_to_index)
    else:
        # update the package geolevels
        lc.action.package_update(**pkg_dict)

    return lc.action.package_show(id=pkg_dict['id'])