def get_group_dataset_counts(): '''For all public groups, return their dataset counts, as a SOLR facet''' query = search.PackageSearchQuery() q = {'q': '+capacity:public', 'fl': 'groups', 'facet.field': ['groups', 'owner_org'], 'facet.limit': -1, 'rows': 1} query.run(q) return query.facets
def group_list_dictize(obj_list, context, sort_key=lambda x:x['display_name'], reverse=False, with_package_counts=True): active = context.get('active', True) with_private = context.get('include_private_packages', False) if with_package_counts: query = search.PackageSearchQuery() q = {'q': '+capacity:public' if not with_private else '*:*', 'fl': 'groups', 'facet.field': ['groups', 'owner_org'], 'facet.limit': -1, 'rows': 1} query.run(q) result_list = [] for obj in obj_list: if context.get('with_capacity'): obj, capacity = obj group_dict = d.table_dictize(obj, context, capacity=capacity) else: group_dict = d.table_dictize(obj, context) group_dict.pop('created') if active and obj.state not in ('active', 'pending'): continue group_dict['display_name'] = (group_dict.get('title') or group_dict.get('name')) image_url = group_dict.get('image_url') group_dict['image_display_url'] = image_url if image_url and not image_url.startswith('http'): #munge here should not have an effect only doing it incase #of potential vulnerability of dodgy api input image_url = munge.munge_filename(image_url) group_dict['image_display_url'] = h.url_for_static( 'uploads/group/%s' % group_dict.get('image_url'), qualified=True ) if with_package_counts: facets = query.facets if obj.is_organization: group_dict['packages'] = facets['owner_org'].get(obj.id, 0) else: group_dict['packages'] = facets['groups'].get(obj.name, 0) if context.get('for_view'): if group_dict['is_organization']: plugin = plugins.IOrganizationController else: plugin = plugins.IGroupController for item in plugins.PluginImplementations(plugin): group_dict = item.before_view(group_dict) result_list.append(group_dict) return sorted(result_list, key=sort_key, reverse=reverse)
def tag_dictize(tag: model.Tag, context: Context, include_datasets: bool = True) -> dict[str, Any]: tag_dict = d.table_dictize(tag, context) package_dicts = [] if include_datasets: query = search.PackageSearchQuery() tag_query = u'+capacity:public ' vocab_id = tag_dict.get('vocabulary_id') if vocab_id: model = context['model'] vocab = model.Vocabulary.get(vocab_id) assert vocab tag_query += u'+vocab_{0}:"{1}"'.format(vocab.name, tag.name) else: tag_query += u'+tags:"{0}"'.format(tag.name) q: dict[str, Any] = { 'q': tag_query, 'fl': 'data_dict', 'wt': 'json', 'rows': 1000 } package_dicts = [ h.json.loads(result['data_dict']) for result in query.run(q)['results'] ] # Add display_names to tags. At first a tag's display_name is just the # same as its name, but the display_name might get changed later (e.g. # translated into another language by the multilingual extension). assert 'display_name' not in tag_dict tag_dict['display_name'] = tag_dict['name'] if context.get('for_view'): for item in plugins.PluginImplementations(plugins.ITagController): tag_dict = item.before_view(tag_dict) if include_datasets: tag_dict['packages'] = [] for package_dict in package_dicts: for item in plugins.PluginImplementations( plugins.IPackageController): package_dict = item.before_dataset_view(package_dict) tag_dict['packages'].append(package_dict) else: if include_datasets: tag_dict['packages'] = package_dicts return tag_dict
def set_resource_metadata(update_dict): ''' Set appropriate datastore_active flag on CKAN resource. Called after creation or deletion of DataStore table. ''' from ckan import model # We're modifying the resource extra directly here to avoid a # race condition, see issue #3245 for details and plan for a # better fix update_dict.update({ 'datastore_active': update_dict.get('datastore_active', True), 'datastore_contains_all_records_of_source_file': update_dict.get('datastore_contains_all_records_of_source_file', True) }) # get extras(for entity update) and package_id(for search index update) res_query = model.Session.query( model.resource_table.c.extras, model.resource_table.c.package_id ).filter( model.Resource.id == update_dict['resource_id'] ) extras, package_id = res_query.one() # update extras in database for record and its revision extras.update(update_dict) res_query.update({'extras': extras}, synchronize_session=False) if hasattr(model, 'resource_revision_table'): model.Session.query(model.resource_revision_table).filter( model.ResourceRevision.id == update_dict['resource_id'], model.ResourceRevision.current is True ).update({'extras': extras}, synchronize_session=False) model.Session.commit() # get package with updated resource from solr # find changed resource, patch it and reindex package psi = search.PackageSearchIndex() solr_query = search.PackageSearchQuery() q = { 'q': 'id:"{0}"'.format(package_id), 'fl': 'data_dict', 'wt': 'json', 'fq': 'site_id:"%s"' % config.get('ckan.site_id'), 'rows': 1 } for record in solr_query.run(q)['results']: solr_data_dict = json.loads(record['data_dict']) for resource in solr_data_dict['resources']: if resource['id'] == update_dict['resource_id']: resource.update(update_dict) psi.index_package(solr_data_dict) break
def organizations_basic_info(): def convert_organization_to_dict(organization, depth): current_organization = {} organization_id = organization.pop('id') current_organization['id'] = organization_id current_organization['name'] = organization.pop('name') current_organization['title'] = organization.pop('title') current_organization['depth'] = depth # si depth == 0, la organización no es hija de otra current_organization['own_package_count'] = organizations_that_have_packages.pop(organization_id, 0) own_available_package_count = ckan_organizations_info.pop(current_organization['name'], {}).get('count', 0) children_data_dict = generate_children_data(organization.pop('children'), depth) current_organization['children'] = children_data_dict['dict_children'] current_organization['total_package_count'] = children_data_dict['current_total_package_count'] \ + current_organization['own_package_count'] current_organization['available_package_count'] = children_data_dict['current_available_package_count'] + \ own_available_package_count current_organization['active'] = current_organization['name'] == organization_in_request current_organization['display'] = not organization_in_request or current_organization['active'] return current_organization def generate_children_data(group_tree_children, depth): dict_children = [] current_available_package_count = 0 current_total_package_count = 0 for child in group_tree_children: converted_child = convert_organization_to_dict(child, depth + 1) dict_children.append(converted_child) current_available_package_count += converted_child.get('available_package_count', 0) current_total_package_count += converted_child.get('total_package_count', 0) return {'dict_children': dict_children, 'current_available_package_count': current_available_package_count, 'current_total_package_count': current_total_package_count} # Traemos las organizaciones organizations = get_organizations_tree() ckan_organizations_info = {item['name']: item for item in ckan_helpers.get_facet_items_dict('organization')} # Realizamos una query para conseguir las organizaciones que tienen datasets, y la cantidad de éstos query = search.PackageSearchQuery() q = {'q': '+capacity:public', 'fl': 'groups', 'facet.field': ['groups', 'owner_org'], 'facet.limit': -1, 'rows': 1} query.run(q) organizations_that_have_packages = query.facets.get('owner_org') # Transformamos cada organización en un dict para facilitar su uso, y agregamos información requerida organizations_data = [] organization_in_request = ckan_helpers.get_request_param('organization') for organization in organizations: current_organization = convert_organization_to_dict(organization, 0) organizations_data.append(current_organization) return organizations_data
def organizations_basic_info(): def convert_organization_to_dict(organization, depth): current_organization = {} organization_id = organization.pop('id') current_organization['id'] = organization_id current_organization['name'] = organization.pop('name') current_organization['title'] = organization.pop('title') current_organization[ 'depth'] = depth # si depth == 0, la organización no es hija de otra current_organization[ 'own_package_count'] = organizations_that_have_packages.pop( organization_id, 0) total_package_count = 0 group_tree_children = organization.pop('children') dict_children = [] for child in group_tree_children: converted_child = convert_organization_to_dict(child, depth + 1) dict_children.append(converted_child) total_package_count += converted_child.get('total_package_count', 0) current_organization['children'] = dict_children current_organization[ 'total_package_count'] = total_package_count + current_organization[ 'own_package_count'] return current_organization # Traemos las organizaciones organizations = logic.get_action('group_tree')({}, { 'type': 'organization' }) # Realizamos una query para conseguir las organizaciones que tienen datasets, y la cantidad de éstos query = search.PackageSearchQuery() q = { 'q': '+capacity:public', 'fl': 'groups', 'facet.field': ['groups', 'owner_org'], 'facet.limit': -1, 'rows': 1 } query.run(q) organizations_that_have_packages = query.facets.get('owner_org') # Transformamos cada organización en un dict para facilitar su uso, y agregamos información requerida organizations_data = [] for organization in organizations: current_organization = convert_organization_to_dict(organization, 0) organizations_data.append(current_organization) return organizations_data
def group_list_dictize(obj_list, context, sort_key=lambda x: x['display_name'], reverse=False): active = context.get('active', True) with_private = context.get('include_private_packages', False) query = search.PackageSearchQuery() q = { 'q': '+capacity:public' if not with_private else '*:*', 'fl': 'groups', 'facet.field': ['groups', 'owner_org'], 'facet.limit': -1, 'rows': 1 } query.run(q) result_list = [] for obj in obj_list: if context.get('with_capacity'): obj, capacity = obj group_dict = d.table_dictize(obj, context, capacity=capacity) else: group_dict = d.table_dictize(obj, context) group_dict.pop('created') if active and obj.state not in ('active', 'pending'): continue group_dict['display_name'] = obj.display_name if obj.is_organization: group_dict['packages'] = query.facets['owner_org'].get(obj.id, 0) else: group_dict['packages'] = query.facets['groups'].get(obj.name, 0) if context.get('for_view'): if group_dict['is_organization']: plugin = plugins.IOrganizationController else: plugin = plugins.IGroupController for item in plugins.PluginImplementations(plugin): group_dict = item.before_view(group_dict) result_list.append(group_dict) return sorted(result_list, key=sort_key, reverse=reverse)
def process_solr(q): # update the solr index for the query query = search.PackageSearchQuery() q = { 'q': q, 'fl': 'data_dict', 'wt': 'json', 'fq': 'site_id:"%s"' % config.get('ckan.site_id'), 'rows': BATCH_SIZE } for result in query.run(q)['results']: data_dict = json.loads(result['data_dict']) if data_dict['owner_org'] == package_dict.get('owner_org'): psi.index_package(data_dict, defer_commit=True)
def group_dictize(group, context): model = context['model'] result_dict = d.table_dictize(group, context) result_dict['display_name'] = group.display_name result_dict['extras'] = extras_dict_dictize( group._extras, context) context['with_capacity'] = True result_dict['packages'] = d.obj_list_dictize( _get_members(context, group, 'packages'), context) query = search.PackageSearchQuery() if group.is_organization: q = {'q': 'owner_org:"%s" +capacity:public' % group.id, 'rows': 1} else: q = {'q': 'groups:"%s" +capacity:public' % group.name, 'rows': 1} result_dict['package_count'] = query.run(q)['count'] result_dict['tags'] = tag_list_dictize( _get_members(context, group, 'tags'), context) result_dict['groups'] = group_list_dictize( _get_members(context, group, 'groups'), context) result_dict['users'] = user_list_dictize( _get_members(context, group, 'users'), context) context['with_capacity'] = False if context.get('for_view'): if result_dict['is_organization']: plugin = plugins.IOrganizationController else: plugin = plugins.IGroupController for item in plugins.PluginImplementations(plugin): result_dict = item.before_view(result_dict) return result_dict
def set_datastore_active_flag(model, data_dict, flag): ''' Set appropriate datastore_active flag on CKAN resource. Called after creation or deletion of DataStore table. ''' update_dict = {'datastore_active': flag} # get extras(for entity update) and package_id(for search index update) res_query = model.Session.query( model.resource_table.c.extras, model.resource_table.c.package_id).filter( model.Resource.id == data_dict['resource_id']) extras, package_id = res_query.one() # update extras in database for record and its revision extras.update(update_dict) res_query.update({'extras': extras}, synchronize_session=False) model.Session.query(model.resource_revision_table).filter( model.ResourceRevision.id == data_dict['resource_id'], model.ResourceRevision.current is True).update( {'extras': extras}, synchronize_session=False) model.Session.commit() # get package with updated resource from solr # find changed resource, patch it and reindex package psi = search.PackageSearchIndex() solr_query = search.PackageSearchQuery() q = { 'q': 'id:"{0}"'.format(package_id), 'fl': 'data_dict', 'wt': 'json', 'fq': 'site_id:"%s"' % config.get('ckan.site_id'), 'rows': 1 } for record in solr_query.run(q)['results']: solr_data_dict = json.loads(record['data_dict']) for resource in solr_data_dict['resources']: if resource['id'] == data_dict['resource_id']: resource.update(update_dict) psi.index_package(solr_data_dict) break
def datastore_create(context, data_dict): '''Adds a new table to the DataStore. The datastore_create action allows you to post JSON data to be stored against a resource. This endpoint also supports altering tables, aliases and indexes and bulk insertion. This endpoint can be called multiple times to initially insert more data, add fields, change the aliases or indexes as well as the primary keys. To create an empty datastore resource and a CKAN resource at the same time, provide ``resource`` with a valid ``package_id`` and omit the ``resource_id``. If you want to create a datastore resource from the content of a file, provide ``resource`` with a valid ``url``. See :ref:`fields` and :ref:`records` for details on how to lay out records. :param resource_id: resource id that the data is going to be stored against. :type resource_id: string :param force: set to True to edit a read-only resource :type force: bool (optional, default: False) :param resource: resource dictionary that is passed to :meth:`~ckan.logic.action.create.resource_create`. Use instead of ``resource_id`` (optional) :type resource: dictionary :param aliases: names for read only aliases of the resource. (optional) :type aliases: list or comma separated string :param fields: fields/columns and their extra metadata. (optional) :type fields: list of dictionaries :param records: the data, eg: [{"dob": "2005", "some_stuff": ["a", "b"]}] (optional) :type records: list of dictionaries :param primary_key: fields that represent a unique key (optional) :type primary_key: list or comma separated string :param indexes: indexes on table (optional) :type indexes: list or comma separated string Please note that setting the ``aliases``, ``indexes`` or ``primary_key`` replaces the exising aliases or constraints. Setting ``records`` appends the provided records to the resource. **Results:** :returns: The newly created data object. :rtype: dictionary See :ref:`fields` and :ref:`records` for details on how to lay out records. ''' schema = context.get('schema', dsschema.datastore_create_schema()) records = data_dict.pop('records', None) resource = data_dict.pop('resource', None) data_dict, errors = _validate(data_dict, schema, context) resource_dict = None if records: data_dict['records'] = records if resource: data_dict['resource'] = resource if errors: raise p.toolkit.ValidationError(errors) p.toolkit.check_access('datastore_ts_create', context, data_dict) if 'resource' in data_dict and 'resource_id' in data_dict: raise p.toolkit.ValidationError({ 'resource': ['resource cannot be used with resource_id'] }) if not 'resource' in data_dict and not 'resource_id' in data_dict: raise p.toolkit.ValidationError({ 'resource_id': ['resource_id or resource required'] }) if 'resource' in data_dict: has_url = 'url' in data_dict['resource'] if 'retention' in data_dict['resource']: try: retention = int(data_dict['resource']['retention']) if retention < 1 or retention > 100: raise Exception() except: raise p.toolkit.ValidationError({'resource': [ 'Retention must be an integer from 1-100']}) # A datastore only resource does not have a url in the db data_dict['resource'].setdefault('url', '_datastore_only_resource') resource_dict = p.toolkit.get_action('resource_create')( context, data_dict['resource']) data_dict['resource_id'] = resource_dict['id'] # create resource from file if has_url: if not p.plugin_loaded('datapusher'): raise p.toolkit.ValidationError({'resource': [ 'The datapusher has to be enabled.']}) p.toolkit.get_action('datapusher_submit')(context, { 'resource_id': resource_dict['id'], 'set_url_type': True }) # since we'll overwrite the datastore resource anyway, we # don't need to create it here return # create empty resource else: # no need to set the full url because it will be set in before_show resource_dict['url_type'] = 'datastore' p.toolkit.get_action('resource_update')(context, resource_dict) else: if not data_dict.pop('force', False): resource_id = data_dict['resource_id'] _check_read_only(context, resource_id) data_dict['connection_url'] = config['ckan.datastore.write_url'] # validate aliases aliases = datastore_helpers.get_list(data_dict.get('aliases', [])) for alias in aliases: if not db._is_valid_table_name(alias): raise p.toolkit.ValidationError({ 'alias': [u'"{0}" is not a valid alias name'.format(alias)] }) # create a private datastore resource, if necessary model = _get_or_bust(context, 'model') resource = model.Resource.get(data_dict['resource_id']) legacy_mode = 'ckan.datastore.read_url' not in config if not legacy_mode and resource.package.private: data_dict['private'] = True try: result = db.create(context, data_dict) except db.InvalidDataError as err: raise p.toolkit.ValidationError(unicode(err)) # Set the datastore_active flag on the resource if necessary if resource.extras.get('datastore_active') is not True: log.debug( 'Setting datastore_active=True on resource {0}'.format(resource.id) ) # issue #3245: race condition update_dict = {'datastore_active': True} # get extras(for entity update) and package_id(for search index update) res_query = model.Session.query( model.resource_table.c.extras, model.resource_table.c.package_id ).filter( model.Resource.id == data_dict['resource_id'] ) extras, package_id = res_query.one() # update extras in database for record and its revision extras.update(update_dict) res_query.update({'extras': extras}, synchronize_session=False) model.Session.query(model.resource_revision_table).filter( model.ResourceRevision.id == data_dict['resource_id'], model.ResourceRevision.current is True ).update({'extras': extras}, synchronize_session=False) model.Session.commit() # get package with updated resource from solr # find changed resource, patch it and reindex package psi = search.PackageSearchIndex() solr_query = search.PackageSearchQuery() q = { 'q': 'id:"{0}"'.format(package_id), 'fl': 'data_dict', 'wt': 'json', 'fq': 'site_id:"%s"' % config.get('ckan.site_id'), 'rows': 1 } for record in solr_query.run(q)['results']: solr_data_dict = json.loads(record['data_dict']) for resource in solr_data_dict['resources']: if resource['id'] == data_dict['resource_id']: resource.update(update_dict) psi.index_package(solr_data_dict) break result.pop('id', None) result.pop('private', None) result.pop('connection_url') datastore_helpers.remove_autogen(result) return result
def update_product_geo(context, data_dict): # noinspection PyUnresolvedReferences """ Update the specificgeocode_bi_txtm value and sets the geo level (geolevel_*) accordingly. :param productId: ID of the product to update. :type productId: str :param dguids: Geo-code values status code :type dguids: list of strings :return: updated package :rtype: dict """ product_id = _get_or_bust(data_dict, 'productId') dguids = _get_or_bust(data_dict, 'dguids') lc = ckanapi.LocalCKAN(context=context) if isinstance(dguids, basestring): dguids = [x.strip() for x in dguids.split(';')] for dguid in dguids: if not stcndm_helpers.is_dguid(dguid): _msg = u'Expecting dguid, got {code} instead'.format(code=dguid) raise _ValidationError({u'geodescriptor_codes': _msg}) response = lc.action.package_search( q='product_id_new:{product_id}'.format( product_id=product_id ) ) if response['count'] == 0: raise _ValidationError(('Record not found.',)) elif response['count'] > 1: raise _ValidationError( ('More than one record identified with these values. ' 'Please contact CKAN IT',) ) pkg_dict = response['results'][0] old_geolevel_codes = pkg_dict.get(u'geolevel_codes', []) pkg_dict['geolevel_codes'] = list( set(stcndm_helpers.get_geolevel(sc) for sc in dguids) ) if pkg_dict['product_type_code'] in VALID_DATA_TYPES: # Data product geodescriptors (for which there can be tens of # thousands) are stored using the geodescriptor service instead of # directly on the package. geo.clear_geodescriptors_for_package(pkg_dict['product_id_new']) for geo_code in dguids: geo.update_relationship(pkg_dict['product_id_new'], geo_code) else: # Non-data products simply have the geodescriptors assigned to the # package. pkg_dict['geodescriptor_codes'] = dguids if old_geolevel_codes == pkg_dict.get(u'geolevel_codes', []): # force the re-index of the package so dguids make it into solr query = search.PackageSearchQuery() q = { 'q': 'id:{id}'.format(id=pkg_dict['id']), 'fl': 'data_dict', 'wt': 'json', 'fq': 'site_id:"%s"' % config.get('ckan.site_id') } pkg_to_index = json.loads(query.run(q)['results'][0]['data_dict']) psi = search.PackageSearchIndex() psi.index_package(pkg_to_index) else: # update the package geolevels lc.action.package_update(**pkg_dict) return lc.action.package_show(id=pkg_dict['id'])