예제 #1
0
def _bulk_update_dataset(context, data_dict, update_dict):
    ''' Bulk update shared code for organizations'''

    datasets = data_dict.get('datasets', [])
    org_id = data_dict.get('org_id')

    model = context['model']

    model.Session.query(model.package_table) \
        .filter(model.Package.id.in_(datasets)) \
        .filter(model.Package.owner_org == org_id) \
        .update(update_dict, synchronize_session=False)

    # revisions
    model.Session.query(model.package_table) \
        .filter(model.Package.id.in_(datasets)) \
        .filter(model.Package.owner_org == org_id) \
        .update(update_dict, synchronize_session=False)

    model.Session.commit()

    # solr update here
    psi = search.PackageSearchIndex()

    # update the solr index in batches
    BATCH_SIZE = 50

    def process_solr(q):
        # update the solr index for the query
        query = search.PackageSearchQuery()
        q = {
            'q': q,
            'fl': 'data_dict',
            'wt': 'json',
            'fq': 'site_id:"%s"' % config.get('ckan.site_id'),
            'rows': BATCH_SIZE
        }

        for result in query.run(q)['results']:
            data_dict = json.loads(result['data_dict'])
            if data_dict['owner_org'] == org_id:
                data_dict.update(update_dict)
                psi.index_package(data_dict, defer_commit=True)

    count = 0
    q = []
    for id in datasets:
        q.append('id:"%s"' % (id))
        count += 1
        if count % BATCH_SIZE == 0:
            process_solr(' OR '.join(q))
            q = []
    if len(q):
        process_solr(' OR '.join(q))
    # finally commit the changes
    psi.commit()
예제 #2
0
def set_resource_metadata(update_dict):
    '''
    Set appropriate datastore_active flag on CKAN resource.

    Called after creation or deletion of DataStore table.
    '''
    from ckan import model
    # We're modifying the resource extra directly here to avoid a
    # race condition, see issue #3245 for details and plan for a
    # better fix
    update_dict.update({
        'datastore_active': update_dict.get('datastore_active', True),
        'datastore_contains_all_records_of_source_file':
        update_dict.get('datastore_contains_all_records_of_source_file', True)
    })

    # get extras(for entity update) and package_id(for search index update)
    res_query = model.Session.query(
        model.resource_table.c.extras,
        model.resource_table.c.package_id
    ).filter(
        model.Resource.id == update_dict['resource_id']
    )
    extras, package_id = res_query.one()

    # update extras in database for record and its revision
    extras.update(update_dict)
    res_query.update({'extras': extras}, synchronize_session=False)
    if hasattr(model, 'resource_revision_table'):
        model.Session.query(model.resource_revision_table).filter(
            model.ResourceRevision.id == update_dict['resource_id'],
            model.ResourceRevision.current is True
        ).update({'extras': extras}, synchronize_session=False)
    model.Session.commit()

    # get package with updated resource from solr
    # find changed resource, patch it and reindex package
    psi = search.PackageSearchIndex()
    solr_query = search.PackageSearchQuery()
    q = {
        'q': 'id:"{0}"'.format(package_id),
        'fl': 'data_dict',
        'wt': 'json',
        'fq': 'site_id:"%s"' % config.get('ckan.site_id'),
        'rows': 1
    }
    for record in solr_query.run(q)['results']:
        solr_data_dict = json.loads(record['data_dict'])
        for resource in solr_data_dict['resources']:
            if resource['id'] == update_dict['resource_id']:
                resource.update(update_dict)
                psi.index_package(solr_data_dict)
                break
예제 #3
0
    def setup_class(cls):

        cls.solr_client = search.make_connection()

        cls.fq = ' +site_id:"%s" ' % config["ckan.site_id"]

        cls.package_index = search.PackageSearchIndex()

        cls.base_package_dict = {
            "id": "test-index",
            "name": "monkey",
            "title": "Monkey",
            "state": "active",
            "private": False,
            "type": "dataset",
            "owner_org": None,
            "metadata_created": datetime.datetime.now().isoformat(),
            "metadata_modified": datetime.datetime.now().isoformat(),
        }
예제 #4
0
def update_solr_package_indexes(package_dict):
    # Updating Solr Index
    if package_dict:
        log.debug("::: UPDATING SOLR INDEX :::")

        # solr update here
        psi = search.PackageSearchIndex()

        # update the solr index in batches
        BATCH_SIZE = 50

        def process_solr(q):
            # update the solr index for the query
            query = search.PackageSearchQuery()
            q = {
                'q': q,
                'fl': 'data_dict',
                'wt': 'json',
                'fq': 'site_id:"%s"' % config.get('ckan.site_id'),
                'rows': BATCH_SIZE
            }

            for result in query.run(q)['results']:
                data_dict = json.loads(result['data_dict'])
                if data_dict['owner_org'] == package_dict.get('owner_org'):
                    psi.index_package(data_dict, defer_commit=True)

        count = 0
        q = []

        q.append('id:"%s"' % (package_dict.get('id')))
        count += 1
        if count % BATCH_SIZE == 0:
            process_solr(' OR '.join(q))
            q = []

        if len(q):
            process_solr(' OR '.join(q))
        # finally commit the changes
        psi.commit()
    else:
        log.warning(
            "::: package_dict is None: SOLR INDEX CANNOT BE UPDATED! :::")
예제 #5
0
def set_datastore_active_flag(model, data_dict, flag):
    '''
    Set appropriate datastore_active flag on CKAN resource.

    Called after creation or deletion of DataStore table.
    '''
    update_dict = {'datastore_active': flag}

    # get extras(for entity update) and package_id(for search index update)
    res_query = model.Session.query(
        model.resource_table.c.extras,
        model.resource_table.c.package_id).filter(
            model.Resource.id == data_dict['resource_id'])
    extras, package_id = res_query.one()

    # update extras in database for record and its revision
    extras.update(update_dict)
    res_query.update({'extras': extras}, synchronize_session=False)
    model.Session.query(model.resource_revision_table).filter(
        model.ResourceRevision.id == data_dict['resource_id'],
        model.ResourceRevision.current is True).update(
            {'extras': extras}, synchronize_session=False)

    model.Session.commit()

    # get package with  updated resource from solr
    # find changed resource, patch it and reindex package
    psi = search.PackageSearchIndex()
    solr_query = search.PackageSearchQuery()
    q = {
        'q': 'id:"{0}"'.format(package_id),
        'fl': 'data_dict',
        'wt': 'json',
        'fq': 'site_id:"%s"' % config.get('ckan.site_id'),
        'rows': 1
    }
    for record in solr_query.run(q)['results']:
        solr_data_dict = json.loads(record['data_dict'])
        for resource in solr_data_dict['resources']:
            if resource['id'] == data_dict['resource_id']:
                resource.update(update_dict)
                psi.index_package(solr_data_dict)
                break
예제 #6
0
파일: action.py 프로젝트: frafra/ckan
def set_datastore_active_flag(
        context: Context, data_dict: dict[str, Any], flag: bool):
    '''
    Set appropriate datastore_active flag on CKAN resource.

    Called after creation or deletion of DataStore table.
    '''
    # We're modifying the resource extra directly here to avoid a
    # race condition, see issue #3245 for details and plan for a
    # better fix
    model = context['model']
    update_dict = {'datastore_active': flag}

    # get extras(for entity update) and package_id(for search index update)
    res_query = model.Session.query(
        model.resource_table.c.extras,
        model.resource_table.c.package_id
    ).filter(
        model.Resource.id == data_dict['resource_id']
    )
    extras, package_id = res_query.one()

    # update extras in database for record
    extras.update(update_dict)
    res_query.update({'extras': extras}, synchronize_session=False)

    model.Session.commit()

    # get package with  updated resource from package_show
    # find changed resource, patch it and reindex package
    psi = search.PackageSearchIndex()
    try:
        _data_dict = p.toolkit.get_action('package_show')(context, {
            'id': package_id
        })
        for resource in _data_dict['resources']:
            if resource['id'] == data_dict['resource_id']:
                resource.update(update_dict)
                psi.index_package(_data_dict)
                break
    except (logic.NotAuthorized, logic.NotFound) as e:
        log.error(e.message)
예제 #7
0
    def setup_class(cls):

        if not search.is_available():
            raise nose.SkipTest('Solr not reachable')

        cls.solr_client = search.make_connection()

        cls.fq = " +site_id:\"%s\" " % config['ckan.site_id']

        cls.package_index = search.PackageSearchIndex()

        cls.base_package_dict = {
            'id': 'test-index',
            'name': 'monkey',
            'title': 'Monkey',
            'state': 'active',
            'private': False,
            'type': 'dataset',
            'owner_org': None,
            'metadata_created': datetime.datetime.now().isoformat(),
            'metadata_modified': datetime.datetime.now().isoformat(),
        }
예제 #8
0
                        if existing_tag:
                            TagMultilang.persist(
                                {
                                    'id': existing_tag.id,
                                    'name': tag_name,
                                    'text': tag_localized_name
                                }, tag_lang)
                            log.info(
                                '::::::::: OBJECT TAG PERSISTED SUCCESSFULLY :::::::::'
                            )

        # Updating Solr Index
        if package_dict:
            log.info("::: UPDATING SOLR INDEX :::")
            # solr update here
            psi = search.PackageSearchIndex()

            # update the solr index in batches
            BATCH_SIZE = 50

            def process_solr(q):
                # update the solr index for the query
                query = search.PackageSearchQuery()
                q = {
                    'q': q,
                    'fl': 'data_dict',
                    'wt': 'json',
                    'fq': 'site_id:"%s"' % config.get('ckan.site_id'),
                    'rows': BATCH_SIZE
                }
예제 #9
0
def datastore_create(context, data_dict):
    '''Adds a new table to the DataStore.

    The datastore_create action allows you to post JSON data to be
    stored against a resource. This endpoint also supports altering tables,
    aliases and indexes and bulk insertion. This endpoint can be called multiple
    times to initially insert more data, add fields, change the aliases or indexes
    as well as the primary keys.

    To create an empty datastore resource and a CKAN resource at the same time,
    provide ``resource`` with a valid ``package_id`` and omit the ``resource_id``.

    If you want to create a datastore resource from the content of a file,
    provide ``resource`` with a valid ``url``.

    See :ref:`fields` and :ref:`records` for details on how to lay out records.

    :param resource_id: resource id that the data is going to be stored against.
    :type resource_id: string
    :param force: set to True to edit a read-only resource
    :type force: bool (optional, default: False)
    :param resource: resource dictionary that is passed to
        :meth:`~ckan.logic.action.create.resource_create`.
        Use instead of ``resource_id`` (optional)
    :type resource: dictionary
    :param aliases: names for read only aliases of the resource. (optional)
    :type aliases: list or comma separated string
    :param fields: fields/columns and their extra metadata. (optional)
    :type fields: list of dictionaries
    :param records: the data, eg: [{"dob": "2005", "some_stuff": ["a", "b"]}]  (optional)
    :type records: list of dictionaries
    :param primary_key: fields that represent a unique key (optional)
    :type primary_key: list or comma separated string
    :param indexes: indexes on table (optional)
    :type indexes: list or comma separated string

    Please note that setting the ``aliases``, ``indexes`` or ``primary_key`` replaces the exising
    aliases or constraints. Setting ``records`` appends the provided records to the resource.

    **Results:**

    :returns: The newly created data object.
    :rtype: dictionary

    See :ref:`fields` and :ref:`records` for details on how to lay out records.

    '''
    schema = context.get('schema', dsschema.datastore_create_schema())
    records = data_dict.pop('records', None)
    resource = data_dict.pop('resource', None)
    data_dict, errors = _validate(data_dict, schema, context)
    resource_dict = None
    if records:
        data_dict['records'] = records
    if resource:
        data_dict['resource'] = resource
    if errors:
        raise p.toolkit.ValidationError(errors)

    p.toolkit.check_access('datastore_ts_create', context, data_dict)

    if 'resource' in data_dict and 'resource_id' in data_dict:
        raise p.toolkit.ValidationError({
            'resource': ['resource cannot be used with resource_id']
        })

    if not 'resource' in data_dict and not 'resource_id' in data_dict:
        raise p.toolkit.ValidationError({
            'resource_id': ['resource_id or resource required']
        })

    if 'resource' in data_dict:
        has_url = 'url' in data_dict['resource']

        if 'retention' in data_dict['resource']:
            try:
                retention = int(data_dict['resource']['retention'])
                if retention < 1 or retention > 100:
                    raise Exception()
            except:
                raise p.toolkit.ValidationError({'resource': [
                    'Retention must be an integer from 1-100']})

        # A datastore only resource does not have a url in the db
        data_dict['resource'].setdefault('url', '_datastore_only_resource')
        resource_dict = p.toolkit.get_action('resource_create')(
            context, data_dict['resource'])
        data_dict['resource_id'] = resource_dict['id']

        # create resource from file
        if has_url:
            if not p.plugin_loaded('datapusher'):
                raise p.toolkit.ValidationError({'resource': [
                    'The datapusher has to be enabled.']})
            p.toolkit.get_action('datapusher_submit')(context, {
                'resource_id': resource_dict['id'],
                'set_url_type': True
            })
            # since we'll overwrite the datastore resource anyway, we
            # don't need to create it here
            return

        # create empty resource
        else:
            # no need to set the full url because it will be set in before_show
            resource_dict['url_type'] = 'datastore'
            p.toolkit.get_action('resource_update')(context, resource_dict)
    else:
        if not data_dict.pop('force', False):
            resource_id = data_dict['resource_id']
            _check_read_only(context, resource_id)

    data_dict['connection_url'] = config['ckan.datastore.write_url']

    # validate aliases
    aliases = datastore_helpers.get_list(data_dict.get('aliases', []))
    for alias in aliases:
        if not db._is_valid_table_name(alias):
            raise p.toolkit.ValidationError({
                'alias': [u'"{0}" is not a valid alias name'.format(alias)]
            })

    # create a private datastore resource, if necessary
    model = _get_or_bust(context, 'model')
    resource = model.Resource.get(data_dict['resource_id'])
    legacy_mode = 'ckan.datastore.read_url' not in config
    if not legacy_mode and resource.package.private:
        data_dict['private'] = True

    try:
        result = db.create(context, data_dict)
    except db.InvalidDataError as err:
        raise p.toolkit.ValidationError(unicode(err))

    # Set the datastore_active flag on the resource if necessary
    if resource.extras.get('datastore_active') is not True:
        log.debug(
            'Setting datastore_active=True on resource {0}'.format(resource.id)
        )
        # issue #3245: race condition
        update_dict = {'datastore_active': True}

        # get extras(for entity update) and package_id(for search index update)
        res_query = model.Session.query(
            model.resource_table.c.extras,
            model.resource_table.c.package_id
        ).filter(
            model.Resource.id == data_dict['resource_id']
        )
        extras, package_id = res_query.one()

        # update extras in database for record and its revision
        extras.update(update_dict)
        res_query.update({'extras': extras}, synchronize_session=False)

        model.Session.query(model.resource_revision_table).filter(
            model.ResourceRevision.id == data_dict['resource_id'],
            model.ResourceRevision.current is True
        ).update({'extras': extras}, synchronize_session=False)

        model.Session.commit()

        # get package with  updated resource from solr
        # find changed resource, patch it and reindex package
        psi = search.PackageSearchIndex()
        solr_query = search.PackageSearchQuery()
        q = {
            'q': 'id:"{0}"'.format(package_id),
            'fl': 'data_dict',
            'wt': 'json',
            'fq': 'site_id:"%s"' % config.get('ckan.site_id'),
            'rows': 1
        }
        for record in solr_query.run(q)['results']:
            solr_data_dict = json.loads(record['data_dict'])
            for resource in solr_data_dict['resources']:
                if resource['id'] == data_dict['resource_id']:
                    resource.update(update_dict)
                    psi.index_package(solr_data_dict)
                    break

    result.pop('id', None)
    result.pop('private', None)
    result.pop('connection_url')
    
    datastore_helpers.remove_autogen(result)

    return result
예제 #10
0
파일: update.py 프로젝트: detanxx/ckan
def _bulk_update_dataset(context: Context, data_dict: DataDict,
                         update_dict: dict[str, Any]):
    ''' Bulk update shared code for organizations'''

    datasets = data_dict.get('datasets', [])
    org_id = data_dict.get('org_id')

    model = context['model']
    model.Session.query(model.package_table) \
        .filter(
            # type_ignore_reason: incomplete SQLAlchemy types
            model.Package.id.in_(datasets)  # type: ignore
        ) .filter(model.Package.owner_org == org_id) \
        .update(update_dict, synchronize_session=False)

    # Handle Activity Stream for Bulk Operations
    user = context['user']
    user_obj = model.User.by_name(user)
    if user_obj:
        user_id = user_obj.id
    else:
        user_id = 'not logged in'
    for dataset in datasets:
        entity = model.Package.get(dataset)
        assert entity
        activity = entity.activity_stream_item('changed', user_id)
        model.Session.add(activity)
    model.Session.commit()

    # solr update here
    psi = search.PackageSearchIndex()

    # update the solr index in batches
    BATCH_SIZE = 50

    def process_solr(q: str):
        # update the solr index for the query
        query = search.PackageSearchQuery()
        q_dict = {
            'q': q,
            'fl': 'data_dict',
            'wt': 'json',
            'fq': 'site_id:"%s"' % config.get_value('ckan.site_id'),
            'rows': BATCH_SIZE
        }

        for result in query.run(q_dict)['results']:
            data_dict = json.loads(result['data_dict'])
            if data_dict['owner_org'] == org_id:
                data_dict.update(update_dict)
                psi.index_package(data_dict, defer_commit=True)

    count = 0
    q = []
    for id in datasets:
        q.append('id:"%s"' % (id))
        count += 1
        if count % BATCH_SIZE == 0:
            process_solr(' OR '.join(q))
            q = []
    if len(q):
        process_solr(' OR '.join(q))
    # finally commit the changes
    psi.commit()
예제 #11
0
 def __init__(self, name=None):
     self.name = 'privatedatasets'
     self.indexer = search.PackageSearchIndex()
예제 #12
0
 def __init__(self, name=None):
     self.indexer = search.PackageSearchIndex()
예제 #13
0
 def __init__(self, name=None):
     self.name = 'dcatAmsterdam'
     self.indexer = search.PackageSearchIndex()
예제 #14
0
def update_product_geo(context, data_dict):
    # noinspection PyUnresolvedReferences
    """
    Update the specificgeocode_bi_txtm value and sets the geo level
    (geolevel_*) accordingly.

    :param productId: ID of the product to update.
    :type productId: str

    :param dguids: Geo-code values status code
    :type dguids: list of strings

    :return: updated package
    :rtype: dict
    """
    product_id = _get_or_bust(data_dict, 'productId')
    dguids = _get_or_bust(data_dict, 'dguids')

    lc = ckanapi.LocalCKAN(context=context)

    if isinstance(dguids, basestring):
        dguids = [x.strip() for x in dguids.split(';')]

    for dguid in dguids:
        if not stcndm_helpers.is_dguid(dguid):
            _msg = u'Expecting dguid, got {code} instead'.format(code=dguid)
            raise _ValidationError({u'geodescriptor_codes': _msg})

    response = lc.action.package_search(
        q='product_id_new:{product_id}'.format(
            product_id=product_id
        )
    )

    if response['count'] == 0:
        raise _ValidationError(('Record not found.',))
    elif response['count'] > 1:
        raise _ValidationError(
            ('More than one record identified with these values. '
             'Please contact CKAN IT',)
        )

    pkg_dict = response['results'][0]
    old_geolevel_codes = pkg_dict.get(u'geolevel_codes', [])
    pkg_dict['geolevel_codes'] = list(
        set(stcndm_helpers.get_geolevel(sc) for sc in dguids)
    )

    if pkg_dict['product_type_code'] in VALID_DATA_TYPES:
        # Data product geodescriptors (for which there can be tens of
        # thousands) are stored using the geodescriptor service instead of
        # directly on the package.
        geo.clear_geodescriptors_for_package(pkg_dict['product_id_new'])
        for geo_code in dguids:
            geo.update_relationship(pkg_dict['product_id_new'], geo_code)
    else:
        # Non-data products simply have the geodescriptors assigned to the
        # package.
        pkg_dict['geodescriptor_codes'] = dguids

    if old_geolevel_codes == pkg_dict.get(u'geolevel_codes', []):
        # force the re-index of the package so dguids make it into solr
        query = search.PackageSearchQuery()
        q = {
            'q': 'id:{id}'.format(id=pkg_dict['id']),
            'fl': 'data_dict',
            'wt': 'json',
            'fq': 'site_id:"%s"' % config.get('ckan.site_id')
        }
        pkg_to_index = json.loads(query.run(q)['results'][0]['data_dict'])
        psi = search.PackageSearchIndex()
        psi.index_package(pkg_to_index)
    else:
        # update the package geolevels
        lc.action.package_update(**pkg_dict)

    return lc.action.package_show(id=pkg_dict['id'])