Example #1
0
def get_group_dataset_counts():
    '''For all public groups, return their dataset counts, as a SOLR facet'''
    query = search.PackageSearchQuery()
    q = {'q': '+capacity:public',
         'fl': 'groups', 'facet.field': ['groups', 'owner_org'],
         'facet.limit': -1, 'rows': 1}
    query.run(q)
    return query.facets
Example #2
0
def group_list_dictize(obj_list, context,
                       sort_key=lambda x:x['display_name'], reverse=False,
                       with_package_counts=True):

    active = context.get('active', True)
    with_private = context.get('include_private_packages', False)

    if with_package_counts:
        query = search.PackageSearchQuery()
        q = {'q': '+capacity:public' if not with_private else '*:*',
             'fl': 'groups', 'facet.field': ['groups', 'owner_org'],
             'facet.limit': -1, 'rows': 1}
        query.run(q)

    result_list = []

    for obj in obj_list:
        if context.get('with_capacity'):
            obj, capacity = obj
            group_dict = d.table_dictize(obj, context, capacity=capacity)
        else:
            group_dict = d.table_dictize(obj, context)
        group_dict.pop('created')
        if active and obj.state not in ('active', 'pending'):
            continue

        group_dict['display_name'] = (group_dict.get('title') or
                                      group_dict.get('name'))

        image_url = group_dict.get('image_url')
        group_dict['image_display_url'] = image_url
        if image_url and not image_url.startswith('http'):
            #munge here should not have an effect only doing it incase
            #of potential vulnerability of dodgy api input
            image_url = munge.munge_filename(image_url)
            group_dict['image_display_url'] = h.url_for_static(
                'uploads/group/%s' % group_dict.get('image_url'),
                qualified=True
            )

        if with_package_counts:
            facets = query.facets
            if obj.is_organization:
                group_dict['packages'] = facets['owner_org'].get(obj.id, 0)
            else:
                group_dict['packages'] = facets['groups'].get(obj.name, 0)

        if context.get('for_view'):
            if group_dict['is_organization']:
                plugin = plugins.IOrganizationController
            else:
                plugin = plugins.IGroupController
            for item in plugins.PluginImplementations(plugin):
                group_dict = item.before_view(group_dict)

        result_list.append(group_dict)
    return sorted(result_list, key=sort_key, reverse=reverse)
Example #3
0
def tag_dictize(tag: model.Tag,
                context: Context,
                include_datasets: bool = True) -> dict[str, Any]:
    tag_dict = d.table_dictize(tag, context)
    package_dicts = []
    if include_datasets:
        query = search.PackageSearchQuery()

        tag_query = u'+capacity:public '
        vocab_id = tag_dict.get('vocabulary_id')

        if vocab_id:
            model = context['model']
            vocab = model.Vocabulary.get(vocab_id)
            assert vocab
            tag_query += u'+vocab_{0}:"{1}"'.format(vocab.name, tag.name)
        else:
            tag_query += u'+tags:"{0}"'.format(tag.name)

        q: dict[str, Any] = {
            'q': tag_query,
            'fl': 'data_dict',
            'wt': 'json',
            'rows': 1000
        }

        package_dicts = [
            h.json.loads(result['data_dict'])
            for result in query.run(q)['results']
        ]

    # Add display_names to tags. At first a tag's display_name is just the
    # same as its name, but the display_name might get changed later (e.g.
    # translated into another language by the multilingual extension).
    assert 'display_name' not in tag_dict
    tag_dict['display_name'] = tag_dict['name']

    if context.get('for_view'):
        for item in plugins.PluginImplementations(plugins.ITagController):
            tag_dict = item.before_view(tag_dict)

        if include_datasets:
            tag_dict['packages'] = []
            for package_dict in package_dicts:
                for item in plugins.PluginImplementations(
                        plugins.IPackageController):
                    package_dict = item.before_dataset_view(package_dict)
                tag_dict['packages'].append(package_dict)
    else:
        if include_datasets:
            tag_dict['packages'] = package_dicts

    return tag_dict
Example #4
0
def set_resource_metadata(update_dict):
    '''
    Set appropriate datastore_active flag on CKAN resource.

    Called after creation or deletion of DataStore table.
    '''
    from ckan import model
    # We're modifying the resource extra directly here to avoid a
    # race condition, see issue #3245 for details and plan for a
    # better fix
    update_dict.update({
        'datastore_active': update_dict.get('datastore_active', True),
        'datastore_contains_all_records_of_source_file':
        update_dict.get('datastore_contains_all_records_of_source_file', True)
    })

    # get extras(for entity update) and package_id(for search index update)
    res_query = model.Session.query(
        model.resource_table.c.extras,
        model.resource_table.c.package_id
    ).filter(
        model.Resource.id == update_dict['resource_id']
    )
    extras, package_id = res_query.one()

    # update extras in database for record and its revision
    extras.update(update_dict)
    res_query.update({'extras': extras}, synchronize_session=False)
    if hasattr(model, 'resource_revision_table'):
        model.Session.query(model.resource_revision_table).filter(
            model.ResourceRevision.id == update_dict['resource_id'],
            model.ResourceRevision.current is True
        ).update({'extras': extras}, synchronize_session=False)
    model.Session.commit()

    # get package with updated resource from solr
    # find changed resource, patch it and reindex package
    psi = search.PackageSearchIndex()
    solr_query = search.PackageSearchQuery()
    q = {
        'q': 'id:"{0}"'.format(package_id),
        'fl': 'data_dict',
        'wt': 'json',
        'fq': 'site_id:"%s"' % config.get('ckan.site_id'),
        'rows': 1
    }
    for record in solr_query.run(q)['results']:
        solr_data_dict = json.loads(record['data_dict'])
        for resource in solr_data_dict['resources']:
            if resource['id'] == update_dict['resource_id']:
                resource.update(update_dict)
                psi.index_package(solr_data_dict)
                break
def organizations_basic_info():
    def convert_organization_to_dict(organization, depth):
        current_organization = {}
        organization_id = organization.pop('id')
        current_organization['id'] = organization_id
        current_organization['name'] = organization.pop('name')
        current_organization['title'] = organization.pop('title')
        current_organization['depth'] = depth  # si depth == 0, la organización no es hija de otra
        current_organization['own_package_count'] = organizations_that_have_packages.pop(organization_id, 0)
        own_available_package_count = ckan_organizations_info.pop(current_organization['name'], {}).get('count', 0)
        children_data_dict = generate_children_data(organization.pop('children'), depth)
        current_organization['children'] = children_data_dict['dict_children']
        current_organization['total_package_count'] = children_data_dict['current_total_package_count'] \
                                                      + current_organization['own_package_count']
        current_organization['available_package_count'] = children_data_dict['current_available_package_count'] + \
                                                          own_available_package_count
        current_organization['active'] = current_organization['name'] == organization_in_request
        current_organization['display'] = not organization_in_request or current_organization['active']
        return current_organization

    def generate_children_data(group_tree_children, depth):
        dict_children = []
        current_available_package_count = 0
        current_total_package_count = 0
        for child in group_tree_children:
            converted_child = convert_organization_to_dict(child, depth + 1)
            dict_children.append(converted_child)
            current_available_package_count += converted_child.get('available_package_count', 0)
            current_total_package_count += converted_child.get('total_package_count', 0)
        return {'dict_children': dict_children, 'current_available_package_count': current_available_package_count,
                'current_total_package_count': current_total_package_count}

    # Traemos las organizaciones
    organizations = get_organizations_tree()
    ckan_organizations_info = {item['name']: item for item in ckan_helpers.get_facet_items_dict('organization')}

    # Realizamos una query para conseguir las organizaciones que tienen datasets, y la cantidad de éstos
    query = search.PackageSearchQuery()
    q = {'q': '+capacity:public', 'fl': 'groups', 'facet.field': ['groups', 'owner_org'], 'facet.limit': -1, 'rows': 1}
    query.run(q)
    organizations_that_have_packages = query.facets.get('owner_org')

    # Transformamos cada organización en un dict para facilitar su uso, y agregamos información requerida
    organizations_data = []
    organization_in_request = ckan_helpers.get_request_param('organization')
    for organization in organizations:
        current_organization = convert_organization_to_dict(organization, 0)
        organizations_data.append(current_organization)

    return organizations_data
def organizations_basic_info():
    def convert_organization_to_dict(organization, depth):
        current_organization = {}
        organization_id = organization.pop('id')
        current_organization['id'] = organization_id
        current_organization['name'] = organization.pop('name')
        current_organization['title'] = organization.pop('title')
        current_organization[
            'depth'] = depth  # si depth == 0, la organización no es hija de otra
        current_organization[
            'own_package_count'] = organizations_that_have_packages.pop(
                organization_id, 0)
        total_package_count = 0
        group_tree_children = organization.pop('children')
        dict_children = []
        for child in group_tree_children:
            converted_child = convert_organization_to_dict(child, depth + 1)
            dict_children.append(converted_child)
            total_package_count += converted_child.get('total_package_count',
                                                       0)
        current_organization['children'] = dict_children
        current_organization[
            'total_package_count'] = total_package_count + current_organization[
                'own_package_count']
        return current_organization

    # Traemos las organizaciones
    organizations = logic.get_action('group_tree')({}, {
        'type': 'organization'
    })

    # Realizamos una query para conseguir las organizaciones que tienen datasets, y la cantidad de éstos
    query = search.PackageSearchQuery()
    q = {
        'q': '+capacity:public',
        'fl': 'groups',
        'facet.field': ['groups', 'owner_org'],
        'facet.limit': -1,
        'rows': 1
    }
    query.run(q)
    organizations_that_have_packages = query.facets.get('owner_org')

    # Transformamos cada organización en un dict para facilitar su uso, y agregamos información requerida
    organizations_data = []
    for organization in organizations:
        current_organization = convert_organization_to_dict(organization, 0)
        organizations_data.append(current_organization)

    return organizations_data
Example #7
0
def group_list_dictize(obj_list,
                       context,
                       sort_key=lambda x: x['display_name'],
                       reverse=False):

    active = context.get('active', True)
    with_private = context.get('include_private_packages', False)

    query = search.PackageSearchQuery()

    q = {
        'q': '+capacity:public' if not with_private else '*:*',
        'fl': 'groups',
        'facet.field': ['groups', 'owner_org'],
        'facet.limit': -1,
        'rows': 1
    }

    query.run(q)

    result_list = []

    for obj in obj_list:
        if context.get('with_capacity'):
            obj, capacity = obj
            group_dict = d.table_dictize(obj, context, capacity=capacity)
        else:
            group_dict = d.table_dictize(obj, context)
        group_dict.pop('created')
        if active and obj.state not in ('active', 'pending'):
            continue

        group_dict['display_name'] = obj.display_name

        if obj.is_organization:
            group_dict['packages'] = query.facets['owner_org'].get(obj.id, 0)
        else:
            group_dict['packages'] = query.facets['groups'].get(obj.name, 0)

        if context.get('for_view'):
            if group_dict['is_organization']:
                plugin = plugins.IOrganizationController
            else:
                plugin = plugins.IGroupController
            for item in plugins.PluginImplementations(plugin):
                group_dict = item.before_view(group_dict)

        result_list.append(group_dict)
    return sorted(result_list, key=sort_key, reverse=reverse)
Example #8
0
            def process_solr(q):
                # update the solr index for the query
                query = search.PackageSearchQuery()
                q = {
                    'q': q,
                    'fl': 'data_dict',
                    'wt': 'json',
                    'fq': 'site_id:"%s"' % config.get('ckan.site_id'),
                    'rows': BATCH_SIZE
                }

                for result in query.run(q)['results']:
                    data_dict = json.loads(result['data_dict'])
                    if data_dict['owner_org'] == package_dict.get('owner_org'):
                        psi.index_package(data_dict, defer_commit=True)
Example #9
0
def group_dictize(group, context):
    model = context['model']
    result_dict = d.table_dictize(group, context)

    result_dict['display_name'] = group.display_name

    result_dict['extras'] = extras_dict_dictize(
        group._extras, context)

    context['with_capacity'] = True

    result_dict['packages'] = d.obj_list_dictize(
        _get_members(context, group, 'packages'),
        context)

    query = search.PackageSearchQuery()
    if group.is_organization:
        q = {'q': 'owner_org:"%s" +capacity:public' % group.id, 'rows': 1}
    else:
        q = {'q': 'groups:"%s" +capacity:public' % group.name, 'rows': 1}
    result_dict['package_count'] = query.run(q)['count']

    result_dict['tags'] = tag_list_dictize(
        _get_members(context, group, 'tags'),
        context)

    result_dict['groups'] = group_list_dictize(
        _get_members(context, group, 'groups'),
        context)

    result_dict['users'] = user_list_dictize(
        _get_members(context, group, 'users'),
        context)

    context['with_capacity'] = False

    if context.get('for_view'):
        if result_dict['is_organization']:
            plugin = plugins.IOrganizationController
        else:
            plugin = plugins.IGroupController
        for item in plugins.PluginImplementations(plugin):
            result_dict = item.before_view(result_dict)

    return result_dict
Example #10
0
def set_datastore_active_flag(model, data_dict, flag):
    '''
    Set appropriate datastore_active flag on CKAN resource.

    Called after creation or deletion of DataStore table.
    '''
    update_dict = {'datastore_active': flag}

    # get extras(for entity update) and package_id(for search index update)
    res_query = model.Session.query(
        model.resource_table.c.extras,
        model.resource_table.c.package_id).filter(
            model.Resource.id == data_dict['resource_id'])
    extras, package_id = res_query.one()

    # update extras in database for record and its revision
    extras.update(update_dict)
    res_query.update({'extras': extras}, synchronize_session=False)
    model.Session.query(model.resource_revision_table).filter(
        model.ResourceRevision.id == data_dict['resource_id'],
        model.ResourceRevision.current is True).update(
            {'extras': extras}, synchronize_session=False)

    model.Session.commit()

    # get package with  updated resource from solr
    # find changed resource, patch it and reindex package
    psi = search.PackageSearchIndex()
    solr_query = search.PackageSearchQuery()
    q = {
        'q': 'id:"{0}"'.format(package_id),
        'fl': 'data_dict',
        'wt': 'json',
        'fq': 'site_id:"%s"' % config.get('ckan.site_id'),
        'rows': 1
    }
    for record in solr_query.run(q)['results']:
        solr_data_dict = json.loads(record['data_dict'])
        for resource in solr_data_dict['resources']:
            if resource['id'] == data_dict['resource_id']:
                resource.update(update_dict)
                psi.index_package(solr_data_dict)
                break
Example #11
0
def datastore_create(context, data_dict):
    '''Adds a new table to the DataStore.

    The datastore_create action allows you to post JSON data to be
    stored against a resource. This endpoint also supports altering tables,
    aliases and indexes and bulk insertion. This endpoint can be called multiple
    times to initially insert more data, add fields, change the aliases or indexes
    as well as the primary keys.

    To create an empty datastore resource and a CKAN resource at the same time,
    provide ``resource`` with a valid ``package_id`` and omit the ``resource_id``.

    If you want to create a datastore resource from the content of a file,
    provide ``resource`` with a valid ``url``.

    See :ref:`fields` and :ref:`records` for details on how to lay out records.

    :param resource_id: resource id that the data is going to be stored against.
    :type resource_id: string
    :param force: set to True to edit a read-only resource
    :type force: bool (optional, default: False)
    :param resource: resource dictionary that is passed to
        :meth:`~ckan.logic.action.create.resource_create`.
        Use instead of ``resource_id`` (optional)
    :type resource: dictionary
    :param aliases: names for read only aliases of the resource. (optional)
    :type aliases: list or comma separated string
    :param fields: fields/columns and their extra metadata. (optional)
    :type fields: list of dictionaries
    :param records: the data, eg: [{"dob": "2005", "some_stuff": ["a", "b"]}]  (optional)
    :type records: list of dictionaries
    :param primary_key: fields that represent a unique key (optional)
    :type primary_key: list or comma separated string
    :param indexes: indexes on table (optional)
    :type indexes: list or comma separated string

    Please note that setting the ``aliases``, ``indexes`` or ``primary_key`` replaces the exising
    aliases or constraints. Setting ``records`` appends the provided records to the resource.

    **Results:**

    :returns: The newly created data object.
    :rtype: dictionary

    See :ref:`fields` and :ref:`records` for details on how to lay out records.

    '''
    schema = context.get('schema', dsschema.datastore_create_schema())
    records = data_dict.pop('records', None)
    resource = data_dict.pop('resource', None)
    data_dict, errors = _validate(data_dict, schema, context)
    resource_dict = None
    if records:
        data_dict['records'] = records
    if resource:
        data_dict['resource'] = resource
    if errors:
        raise p.toolkit.ValidationError(errors)

    p.toolkit.check_access('datastore_ts_create', context, data_dict)

    if 'resource' in data_dict and 'resource_id' in data_dict:
        raise p.toolkit.ValidationError({
            'resource': ['resource cannot be used with resource_id']
        })

    if not 'resource' in data_dict and not 'resource_id' in data_dict:
        raise p.toolkit.ValidationError({
            'resource_id': ['resource_id or resource required']
        })

    if 'resource' in data_dict:
        has_url = 'url' in data_dict['resource']

        if 'retention' in data_dict['resource']:
            try:
                retention = int(data_dict['resource']['retention'])
                if retention < 1 or retention > 100:
                    raise Exception()
            except:
                raise p.toolkit.ValidationError({'resource': [
                    'Retention must be an integer from 1-100']})

        # A datastore only resource does not have a url in the db
        data_dict['resource'].setdefault('url', '_datastore_only_resource')
        resource_dict = p.toolkit.get_action('resource_create')(
            context, data_dict['resource'])
        data_dict['resource_id'] = resource_dict['id']

        # create resource from file
        if has_url:
            if not p.plugin_loaded('datapusher'):
                raise p.toolkit.ValidationError({'resource': [
                    'The datapusher has to be enabled.']})
            p.toolkit.get_action('datapusher_submit')(context, {
                'resource_id': resource_dict['id'],
                'set_url_type': True
            })
            # since we'll overwrite the datastore resource anyway, we
            # don't need to create it here
            return

        # create empty resource
        else:
            # no need to set the full url because it will be set in before_show
            resource_dict['url_type'] = 'datastore'
            p.toolkit.get_action('resource_update')(context, resource_dict)
    else:
        if not data_dict.pop('force', False):
            resource_id = data_dict['resource_id']
            _check_read_only(context, resource_id)

    data_dict['connection_url'] = config['ckan.datastore.write_url']

    # validate aliases
    aliases = datastore_helpers.get_list(data_dict.get('aliases', []))
    for alias in aliases:
        if not db._is_valid_table_name(alias):
            raise p.toolkit.ValidationError({
                'alias': [u'"{0}" is not a valid alias name'.format(alias)]
            })

    # create a private datastore resource, if necessary
    model = _get_or_bust(context, 'model')
    resource = model.Resource.get(data_dict['resource_id'])
    legacy_mode = 'ckan.datastore.read_url' not in config
    if not legacy_mode and resource.package.private:
        data_dict['private'] = True

    try:
        result = db.create(context, data_dict)
    except db.InvalidDataError as err:
        raise p.toolkit.ValidationError(unicode(err))

    # Set the datastore_active flag on the resource if necessary
    if resource.extras.get('datastore_active') is not True:
        log.debug(
            'Setting datastore_active=True on resource {0}'.format(resource.id)
        )
        # issue #3245: race condition
        update_dict = {'datastore_active': True}

        # get extras(for entity update) and package_id(for search index update)
        res_query = model.Session.query(
            model.resource_table.c.extras,
            model.resource_table.c.package_id
        ).filter(
            model.Resource.id == data_dict['resource_id']
        )
        extras, package_id = res_query.one()

        # update extras in database for record and its revision
        extras.update(update_dict)
        res_query.update({'extras': extras}, synchronize_session=False)

        model.Session.query(model.resource_revision_table).filter(
            model.ResourceRevision.id == data_dict['resource_id'],
            model.ResourceRevision.current is True
        ).update({'extras': extras}, synchronize_session=False)

        model.Session.commit()

        # get package with  updated resource from solr
        # find changed resource, patch it and reindex package
        psi = search.PackageSearchIndex()
        solr_query = search.PackageSearchQuery()
        q = {
            'q': 'id:"{0}"'.format(package_id),
            'fl': 'data_dict',
            'wt': 'json',
            'fq': 'site_id:"%s"' % config.get('ckan.site_id'),
            'rows': 1
        }
        for record in solr_query.run(q)['results']:
            solr_data_dict = json.loads(record['data_dict'])
            for resource in solr_data_dict['resources']:
                if resource['id'] == data_dict['resource_id']:
                    resource.update(update_dict)
                    psi.index_package(solr_data_dict)
                    break

    result.pop('id', None)
    result.pop('private', None)
    result.pop('connection_url')
    
    datastore_helpers.remove_autogen(result)

    return result
Example #12
0
def update_product_geo(context, data_dict):
    # noinspection PyUnresolvedReferences
    """
    Update the specificgeocode_bi_txtm value and sets the geo level
    (geolevel_*) accordingly.

    :param productId: ID of the product to update.
    :type productId: str

    :param dguids: Geo-code values status code
    :type dguids: list of strings

    :return: updated package
    :rtype: dict
    """
    product_id = _get_or_bust(data_dict, 'productId')
    dguids = _get_or_bust(data_dict, 'dguids')

    lc = ckanapi.LocalCKAN(context=context)

    if isinstance(dguids, basestring):
        dguids = [x.strip() for x in dguids.split(';')]

    for dguid in dguids:
        if not stcndm_helpers.is_dguid(dguid):
            _msg = u'Expecting dguid, got {code} instead'.format(code=dguid)
            raise _ValidationError({u'geodescriptor_codes': _msg})

    response = lc.action.package_search(
        q='product_id_new:{product_id}'.format(
            product_id=product_id
        )
    )

    if response['count'] == 0:
        raise _ValidationError(('Record not found.',))
    elif response['count'] > 1:
        raise _ValidationError(
            ('More than one record identified with these values. '
             'Please contact CKAN IT',)
        )

    pkg_dict = response['results'][0]
    old_geolevel_codes = pkg_dict.get(u'geolevel_codes', [])
    pkg_dict['geolevel_codes'] = list(
        set(stcndm_helpers.get_geolevel(sc) for sc in dguids)
    )

    if pkg_dict['product_type_code'] in VALID_DATA_TYPES:
        # Data product geodescriptors (for which there can be tens of
        # thousands) are stored using the geodescriptor service instead of
        # directly on the package.
        geo.clear_geodescriptors_for_package(pkg_dict['product_id_new'])
        for geo_code in dguids:
            geo.update_relationship(pkg_dict['product_id_new'], geo_code)
    else:
        # Non-data products simply have the geodescriptors assigned to the
        # package.
        pkg_dict['geodescriptor_codes'] = dguids

    if old_geolevel_codes == pkg_dict.get(u'geolevel_codes', []):
        # force the re-index of the package so dguids make it into solr
        query = search.PackageSearchQuery()
        q = {
            'q': 'id:{id}'.format(id=pkg_dict['id']),
            'fl': 'data_dict',
            'wt': 'json',
            'fq': 'site_id:"%s"' % config.get('ckan.site_id')
        }
        pkg_to_index = json.loads(query.run(q)['results'][0]['data_dict'])
        psi = search.PackageSearchIndex()
        psi.index_package(pkg_to_index)
    else:
        # update the package geolevels
        lc.action.package_update(**pkg_dict)

    return lc.action.package_show(id=pkg_dict['id'])