def update_product_geo(context, data_dict): # noinspection PyUnresolvedReferences """ Update the specificgeocode_bi_txtm value and sets the geo level (geolevel_*) accordingly. :param productId: ID of the product to update. :type productId: str :param dguids: Geo-code values status code :type dguids: list of strings :return: updated package :rtype: dict """ product_id = _get_or_bust(data_dict, 'productId') dguids = _get_or_bust(data_dict, 'dguids') lc = ckanapi.LocalCKAN(context=context) if isinstance(dguids, basestring): dguids = [x.strip() for x in dguids.split(';')] response = lc.action.package_search( q='product_id_new:{product_id}'.format( product_id=product_id ) ) if response['count'] == 0: raise _ValidationError(('Record not found.',)) elif response['count'] > 1: raise _ValidationError( ('More than one record identified with these values. ' 'Please contact CKAN IT',) ) pkg_dict = response['results'][0] pkg_dict['geolevel_codes'] = list(set(sc[:5] for sc in dguids)) if pkg_dict['product_type_code'] in VALID_DATA_TYPES: # Data product geodescriptors (for which there can be tens of # thousands) are stored using the geodescriptor service instead of # directly on the package. geo.clear_geodescriptors_for_package(pkg_dict['product_id_new']) for geo_code in dguids: geo.update_relationship(pkg_dict['product_id_new'], geo_code) else: # Non-data products simply have the geodescriptors assigned to the # package. pkg_dict['geodescriptor_codes'] = dguids # TODO: Check the results? lc.action.package_update(**pkg_dict) return lc.action.package_show(id=pkg_dict['id'])
def register_data_product(context, data_dict): # noinspection PyUnresolvedReferences """ Register a new data product based on a given `parentProductId` (the 8-digit ID of a cube) and the desired `productTypeCode`. The new product's fields will be populated based on the cube record. .. note:: If the cube is missing English and French titles, the cube will be updated. .. note:: As the schemas for tables, indicators, charts, and maps do not yet exist, this method is not thoroughly tested. :param parentProductId: 8-digit cube id :type parentProductId: str :param productType: 2-digit product type code :type productType: str :param productTitle: EN/FR title dictionary :type productTitle: dict :return: newly-registered product id :rtype: dict """ # These are the only product types that can be registered using # this method as these are the only "data products". # TODO: Can we pull this from somewhere? Presets.yaml does not # necessarily have the exact schema name in ndm_product_type. CUBE_PRODUCT_TYPE = u'10' cube_id = _get_or_bust(data_dict, 'parentProductId') title = _get_or_bust(data_dict, 'productTitle') product_type = _get_or_bust(data_dict, 'productType').zfill(2) if product_type == CUBE_PRODUCT_TYPE: raise _ValidationError( ('Please use RegisterCube to register a cube',) ) elif product_type not in VALID_DATA_TYPES: raise _ValidationError( ('Invalid data productType, only data products may be registered ' 'with this service',) ) lc = ckanapi.LocalCKAN(context=context) cube_dict = lc.action.GetCube(cubeId=cube_id) product_type_schema = lc.action.GetDatasetSchema( name=VALID_DATA_TYPES[product_type] ) # Copy fields that overlap between the cubes and the destination # type. copied_fields = {} for field in product_type_schema['dataset_fields']: field_name = field['field_name'] if field_name in cube_dict: copied_fields[field_name] = cube_dict[field_name] # FIXME: This is not atomic. If this API method is called quickly # in parallel, this product ID could no longer be free. product_id = lc.action.GetNextProductId( parentProductId=cube_id, productType=product_type ) # Overwrite/add some fields that we don't want to inherit # from the cube. copied_fields.update({ 'type': VALID_DATA_TYPES[product_type], 'name': product_id, 'owner_org': 'statcan', 'product_id_new': product_id, 'top_parent_id': cube_id, 'title': title, 'product_type_code': product_type }) if product_type == '11': # Never currently set before this point, but just in case we don't # want to trample it. if not copied_fields.get('content_type_codes'): copied_fields['content_type_codes'] = ['2012'] # We don't want to store geodescriptors for data products as part # of the dataset, as there can be tens of thousands of them. The poor # performance of datasets in CKAN means this would criple normal package # creates, updates, and fetches. new_pkg = lc.action.package_create(**copied_fields) geo.clear_geodescriptors_for_package(new_pkg['product_id_new']) geo_codes = data_dict.get('geodescriptor_codes') if geo_codes: for geo_code in geo_codes: geo.update_relationship( new_pkg['product_id_new'], geo_code ) if product_type == '11' and product_id.endswith('01'): lc.action.UpdateDefaultView(cubeId=cube_id, defaultView=product_id) return {'product_id_new': product_id}
def update_product_geo(context, data_dict): # noinspection PyUnresolvedReferences """ Update the specificgeocode_bi_txtm value and sets the geo level (geolevel_*) accordingly. :param productId: ID of the product to update. :type productId: str :param dguids: Geo-code values status code :type dguids: list of strings :return: updated package :rtype: dict """ product_id = _get_or_bust(data_dict, 'productId') dguids = _get_or_bust(data_dict, 'dguids') lc = ckanapi.LocalCKAN(context=context) if isinstance(dguids, basestring): dguids = [x.strip() for x in dguids.split(';')] for dguid in dguids: if not stcndm_helpers.is_dguid(dguid): _msg = u'Expecting dguid, got {code} instead'.format(code=dguid) raise _ValidationError({u'geodescriptor_codes': _msg}) response = lc.action.package_search( q='product_id_new:{product_id}'.format( product_id=product_id ) ) if response['count'] == 0: raise _ValidationError(('Record not found.',)) elif response['count'] > 1: raise _ValidationError( ('More than one record identified with these values. ' 'Please contact CKAN IT',) ) pkg_dict = response['results'][0] old_geolevel_codes = pkg_dict.get(u'geolevel_codes', []) pkg_dict['geolevel_codes'] = list( set(stcndm_helpers.get_geolevel(sc) for sc in dguids) ) if pkg_dict['product_type_code'] in VALID_DATA_TYPES: # Data product geodescriptors (for which there can be tens of # thousands) are stored using the geodescriptor service instead of # directly on the package. geo.clear_geodescriptors_for_package(pkg_dict['product_id_new']) for geo_code in dguids: geo.update_relationship(pkg_dict['product_id_new'], geo_code) else: # Non-data products simply have the geodescriptors assigned to the # package. pkg_dict['geodescriptor_codes'] = dguids if old_geolevel_codes == pkg_dict.get(u'geolevel_codes', []): # force the re-index of the package so dguids make it into solr query = search.PackageSearchQuery() q = { 'q': 'id:{id}'.format(id=pkg_dict['id']), 'fl': 'data_dict', 'wt': 'json', 'fq': 'site_id:"%s"' % config.get('ckan.site_id') } pkg_to_index = json.loads(query.run(q)['results'][0]['data_dict']) psi = search.PackageSearchIndex() psi.index_package(pkg_to_index) else: # update the package geolevels lc.action.package_update(**pkg_dict) return lc.action.package_show(id=pkg_dict['id'])