Exemple #1
0
def datastore_create(context, data_dict):
    '''Adds a new table to the datastore.

    The datastore_create action allows a user to post JSON data to be
    stored against a resource. This endpoint also supports altering tables,
    aliases and indexes and bulk insertion. This endpoint can be called multiple
    times to ininially insert more data, add fields, change the aliases or indexes
    as well as the primary keys.

    See :ref:`fields` and :ref:`records` for details on how to lay out records.

    :param resource_id: resource id that the data is going to be stored against.
    :type resource_id: string
    :param aliases: names for read only aliases of the resource.
    :type aliases: list or comma separated string
    :param fields: fields/columns and their extra metadata.
    :type fields: list of dictionaries
    :param records: the data, eg: [{"dob": "2005", "some_stuff": ["a", "b"]}]
    :type records: list of dictionaries
    :param primary_key: fields that represent a unique key
    :type primary_key: list or comma separated string
    :param indexes: indexes on table
    :type indexes: list or comma separated string

    Please note that setting the ``aliases``, ``indexes`` or ``primary_key`` replaces the exising
    aliases or constraints. Setting ``records`` appends the provided records to the resource.

    **Results:**

    :returns: The newly created data object.
    :rtype: dictionary

    See :ref:`fields` and :ref:`records` for details on how to lay out records.

    '''
    schema = context.get('schema', dsschema.datastore_create_schema())
    records = data_dict.pop('records', None)
    data_dict, errors = _validate(data_dict, schema, context)
    if records:
        data_dict['records'] = records
    if errors:
        raise p.toolkit.ValidationError(errors)

    p.toolkit.check_access('datastore_create', context, data_dict)

    data_dict['connection_url'] = pylons.config['ckan.datastore.write_url']

    # validate aliases
    aliases = db._get_list(data_dict.get('aliases', []))
    for alias in aliases:
        if not db._is_valid_table_name(alias):
            raise p.toolkit.ValidationError(
                {'alias': ['"{0}" is not a valid alias name'.format(alias)]})

    result = db.create(context, data_dict)
    result.pop('id', None)
    result.pop('connection_url')
    return result
Exemple #2
0
def datastore_create(context, data_dict):
    '''Adds a new table to the DataStore.

    The datastore_create action allows you to post JSON data to be
    stored against a resource. This endpoint also supports altering tables,
    aliases and indexes and bulk insertion. This endpoint can be called multiple
    times to initially insert more data, add fields, change the aliases or indexes
    as well as the primary keys.

    To create an empty datastore resource and a CKAN resource at the same time,
    provide ``resource`` with a valid ``package_id`` and omit the ``resource_id``.

    If you want to create a datastore resource from the content of a file,
    provide ``resource`` with a valid ``url``.

    See :ref:`fields` and :ref:`records` for details on how to lay out records.

    :param resource_id: resource id that the data is going to be stored against.
    :type resource_id: string
    :param force: set to True to edit a read-only resource
    :type force: bool (optional, default: False)
    :param resource: resource dictionary that is passed to
        :meth:`~ckan.logic.action.create.resource_create`.
        Use instead of ``resource_id`` (optional)
    :type resource: dictionary
    :param aliases: names for read only aliases of the resource. (optional)
    :type aliases: list or comma separated string
    :param fields: fields/columns and their extra metadata. (optional)
    :type fields: list of dictionaries
    :param records: the data, eg: [{"dob": "2005", "some_stuff": ["a", "b"]}]  (optional)
    :type records: list of dictionaries
    :param primary_key: fields that represent a unique key (optional)
    :type primary_key: list or comma separated string
    :param indexes: indexes on table (optional)
    :type indexes: list or comma separated string

    Please note that setting the ``aliases``, ``indexes`` or ``primary_key`` replaces the exising
    aliases or constraints. Setting ``records`` appends the provided records to the resource.

    **Results:**

    :returns: The newly created data object.
    :rtype: dictionary

    See :ref:`fields` and :ref:`records` for details on how to lay out records.

    '''
    schema = context.get('schema', dsschema.datastore_create_schema())
    records = data_dict.pop('records', None)
    resource = data_dict.pop('resource', None)
    data_dict, errors = _validate(data_dict, schema, context)
    if records:
        data_dict['records'] = records
    if resource:
        data_dict['resource'] = resource
    if errors:
        raise p.toolkit.ValidationError(errors)

    p.toolkit.check_access('datastore_create', context, data_dict)

    if 'resource' in data_dict and 'resource_id' in data_dict:
        raise p.toolkit.ValidationError(
            {'resource': ['resource cannot be used with resource_id']})

    if not 'resource' in data_dict and not 'resource_id' in data_dict:
        raise p.toolkit.ValidationError(
            {'resource_id': ['resource_id or resource required']})

    if 'resource' in data_dict:
        has_url = 'url' in data_dict['resource']
        # A datastore only resource does not have a url in the db
        data_dict['resource'].setdefault('url', '_datastore_only_resource')
        res = p.toolkit.get_action('resource_create')(context,
                                                      data_dict['resource'])
        data_dict['resource_id'] = res['id']

        # create resource from file
        if has_url:
            if not p.plugin_loaded('datapusher'):
                raise p.toolkit.ValidationError(
                    {'resource': ['The datapusher has to be enabled.']})
            p.toolkit.get_action('datapusher_submit')(context, {
                'resource_id': res['id'],
                'set_url_type': True
            })
            # since we'll overwrite the datastore resource anyway, we
            # don't need to create it here
            return

        # create empty resource
        else:
            # no need to set the full url because it will be set in before_show
            res['url_type'] = 'datastore'
            p.toolkit.get_action('resource_update')(context, res)
    else:
        if not data_dict.pop('force', False):
            resource_id = data_dict['resource_id']
            _check_read_only(context, resource_id)

    data_dict['connection_url'] = pylons.config['ckan.datastore.write_url']

    # validate aliases
    aliases = datastore_helpers.get_list(data_dict.get('aliases', []))
    for alias in aliases:
        if not db._is_valid_table_name(alias):
            raise p.toolkit.ValidationError(
                {'alias': [u'"{0}" is not a valid alias name'.format(alias)]})

    # create a private datastore resource, if necessary
    model = _get_or_bust(context, 'model')
    resource = model.Resource.get(data_dict['resource_id'])
    legacy_mode = 'ckan.datastore.read_url' not in pylons.config
    if not legacy_mode and resource.package.private:
        data_dict['private'] = True

    try:
        result = db.create(context, data_dict)
    except db.InvalidDataError as err:
        raise p.toolkit.ValidationError(str(err))

    result.pop('id', None)
    result.pop('private', None)
    result.pop('connection_url')
    return result
Exemple #3
0
def datastore_create(context, data_dict):
    '''Adds a new table to the DataStore.

    The datastore_create action allows you to post JSON data to be
    stored against a resource. This endpoint also supports altering tables,
    aliases and indexes and bulk insertion. This endpoint can be called multiple
    times to initially insert more data, add fields, change the aliases or indexes
    as well as the primary keys.

    To create an empty datastore resource and a CKAN resource at the same time,
    provide ``resource`` with a valid ``package_id`` and omit the ``resource_id``.

    If you want to create a datastore resource from the content of a file,
    provide ``resource`` with a valid ``url``.

    See :ref:`fields` and :ref:`records` for details on how to lay out records.

    :param resource_id: resource id that the data is going to be stored against.
    :type resource_id: string
    :param force: set to True to edit a read-only resource
    :type force: bool (optional, default: False)
    :param resource: resource dictionary that is passed to
        :meth:`~ckan.logic.action.create.resource_create`.
        Use instead of ``resource_id`` (optional)
    :type resource: dictionary
    :param aliases: names for read only aliases of the resource. (optional)
    :type aliases: list or comma separated string
    :param fields: fields/columns and their extra metadata. (optional)
    :type fields: list of dictionaries
    :param records: the data, eg: [{"dob": "2005", "some_stuff": ["a", "b"]}]  (optional)
    :type records: list of dictionaries
    :param primary_key: fields that represent a unique key (optional)
    :type primary_key: list or comma separated string
    :param indexes: indexes on table (optional)
    :type indexes: list or comma separated string

    Please note that setting the ``aliases``, ``indexes`` or ``primary_key`` replaces the exising
    aliases or constraints. Setting ``records`` appends the provided records to the resource.

    **Results:**

    :returns: The newly created data object.
    :rtype: dictionary

    See :ref:`fields` and :ref:`records` for details on how to lay out records.

    '''
    schema = context.get('schema', dsschema.datastore_create_schema())
    records = data_dict.pop('records', None)
    resource = data_dict.pop('resource', None)
    data_dict, errors = _validate(data_dict, schema, context)
    if records:
        data_dict['records'] = records
    if resource:
        data_dict['resource'] = resource
    if errors:
        raise p.toolkit.ValidationError(errors)

    p.toolkit.check_access('datastore_create', context, data_dict)

    if 'resource' in data_dict and 'resource_id' in data_dict:
        raise p.toolkit.ValidationError({
            'resource': ['resource cannot be used with resource_id']
        })

    if not 'resource' in data_dict and not 'resource_id' in data_dict:
        raise p.toolkit.ValidationError({
            'resource_id': ['resource_id or resource required']
        })

    if 'resource' in data_dict:
        has_url = 'url' in data_dict['resource']
        # A datastore only resource does not have a url in the db
        data_dict['resource'].setdefault('url', '_datastore_only_resource')
        res = p.toolkit.get_action('resource_create')(context,
                                                      data_dict['resource'])
        data_dict['resource_id'] = res['id']

        # create resource from file
        if has_url:
            if not p.plugin_loaded('datapusher'):
                raise p.toolkit.ValidationError({'resource': [
                    'The datapusher has to be enabled.']})
            p.toolkit.get_action('datapusher_submit')(context, {
                'resource_id': res['id'],
                'set_url_type': True
            })
            # since we'll overwrite the datastore resource anyway, we
            # don't need to create it here
            return

        # create empty resource
        else:
            # no need to set the full url because it will be set in before_show
            res['url_type'] = 'datastore'
            p.toolkit.get_action('resource_update')(context, res)
    else:
        if not data_dict.pop('force', False):
            resource_id = data_dict['resource_id']
            _check_read_only(context, resource_id)

    data_dict['connection_url'] = pylons.config['ckan.datastore.write_url']

    # validate aliases
    aliases = datastore_helpers.get_list(data_dict.get('aliases', []))
    for alias in aliases:
        if not db._is_valid_table_name(alias):
            raise p.toolkit.ValidationError({
                'alias': [u'"{0}" is not a valid alias name'.format(alias)]
            })

    # create a private datastore resource, if necessary
    model = _get_or_bust(context, 'model')
    resource = model.Resource.get(data_dict['resource_id'])
    legacy_mode = 'ckan.datastore.read_url' not in pylons.config
    if not legacy_mode and resource.resource_group.package.private:
        data_dict['private'] = True

    result = db.create(context, data_dict)
    result.pop('id', None)
    result.pop('private', None)
    result.pop('connection_url')
    return result
Exemple #4
0
def datastore_create(context, data_dict):
    '''Adds a new table to the DataStore.

    The datastore_create action allows you to post JSON data to be
    stored against a resource. This endpoint also supports altering tables,
    aliases and indexes and bulk insertion. This endpoint can be called multiple
    times to initially insert more data, add fields, change the aliases or indexes
    as well as the primary keys.

    See :ref:`fields` and :ref:`records` for details on how to lay out records.

    :param resource_id: resource id that the data is going to be stored against.
    :type resource_id: string
    :param aliases: names for read only aliases of the resource. (optional)
    :type aliases: list or comma separated string
    :param fields: fields/columns and their extra metadata. (optional)
    :type fields: list of dictionaries
    :param records: the data, eg: [{"dob": "2005", "some_stuff": ["a", "b"]}]  (optional)
    :type records: list of dictionaries
    :param primary_key: fields that represent a unique key (optional)
    :type primary_key: list or comma separated string
    :param indexes: indexes on table (optional)
    :type indexes: list or comma separated string

    Please note that setting the ``aliases``, ``indexes`` or ``primary_key`` replaces the exising
    aliases or constraints. Setting ``records`` appends the provided records to the resource.

    **Results:**

    :returns: The newly created data object.
    :rtype: dictionary

    See :ref:`fields` and :ref:`records` for details on how to lay out records.

    '''
    schema = context.get('schema', dsschema.datastore_create_schema())
    records = data_dict.pop('records', None)
    data_dict, errors = _validate(data_dict, schema, context)
    if records:
        data_dict['records'] = records
    if errors:
        raise p.toolkit.ValidationError(errors)

    p.toolkit.check_access('datastore_create', context, data_dict)

    data_dict['connection_url'] = pylons.config['ckan.datastore.write_url']

    # validate aliases
    aliases = db._get_list(data_dict.get('aliases', []))
    for alias in aliases:
        if not db._is_valid_table_name(alias):
            raise p.toolkit.ValidationError({
                'alias': ['"{0}" is not a valid alias name'.format(alias)]
            })

    # create a private datastore resource, if necessary
    model = _get_or_bust(context, 'model')
    resource = model.Resource.get(data_dict['resource_id'])
    legacy_mode = 'ckan.datastore.read_url' not in pylons.config
    if not legacy_mode and resource.resource_group.package.private:
        data_dict['private'] = True

    result = db.create(context, data_dict)
    result.pop('id', None)
    result.pop('private', None)
    result.pop('connection_url')
    return result
Exemple #5
0
def datastore_create(context, data_dict):
    '''Adds a new table to the DataStore.

    The datastore_create action allows you to post JSON data to be
    stored against a resource. This endpoint also supports altering tables,
    aliases and indexes and bulk insertion. This endpoint can be called
    multiple times to initially insert more data, add fields, change the
    aliases or indexes as well as the primary keys.

    To create an empty datastore resource and a CKAN resource at the same time,
    provide ``resource`` with a valid ``package_id`` and omit the
    ``resource_id``.

    If you want to create a datastore resource from the content of a file,
    provide ``resource`` with a valid ``url``.

    See :ref:`fields` and :ref:`records` for details on how to lay out records.

    :param resource_id: resource id that the data is going to be stored
                        against.
    :type resource_id: string
    :param force: set to True to edit a read-only resource
    :type force: bool (optional, default: False)
    :param resource: resource dictionary that is passed to
        :meth:`~ckan.logic.action.create.resource_create`.
        Use instead of ``resource_id`` (optional)
    :type resource: dictionary
    :param aliases: names for read only aliases of the resource. (optional)
    :type aliases: list or comma separated string
    :param fields: fields/columns and their extra metadata. (optional)
    :type fields: list of dictionaries
    :param records: the data, eg: [{"dob": "2005", "some_stuff": ["a", "b"]}]
                    (optional)
    :type records: list of dictionaries
    :param primary_key: fields that represent a unique key (optional)
    :type primary_key: list or comma separated string
    :param indexes: indexes on table (optional)
    :type indexes: list or comma separated string
    :param triggers: trigger functions to apply to this table on update/insert.
        functions may be created with
        :meth:`~ckanext.datastore.logic.action.datastore_function_create`.
        eg: [
        {"function": "trigger_clean_reference"},
        {"function": "trigger_check_codes"}]
    :type triggers: list of dictionaries
    :param calculate_record_count: updates the stored count of records, used to
        optimize datastore_search in combination with the
        `total_estimation_threshold` parameter. If doing a series of requests
        to change a resource, you only need to set this to True on the last
        request.
    :type calculate_record_count: bool (optional, default: False)

    Please note that setting the ``aliases``, ``indexes`` or ``primary_key``
    replaces the existing aliases or constraints. Setting ``records`` appends
    the provided records to the resource.

    **Results:**

    :returns: The newly created data object, excluding ``records`` passed.
    :rtype: dictionary

    See :ref:`fields` and :ref:`records` for details on how to lay out records.

    '''
    backend = DatastoreBackend.get_active_backend()
    schema = context.get('schema', dsschema.datastore_create_schema())
    records = data_dict.pop('records', None)
    resource = data_dict.pop('resource', None)
    data_dict, errors = _validate(data_dict, schema, context)
    resource_dict = None
    if records:
        data_dict['records'] = records
    if resource:
        data_dict['resource'] = resource
    if errors:
        raise p.toolkit.ValidationError(errors)

    p.toolkit.check_access('datastore_create', context, data_dict)

    if 'resource' in data_dict and 'resource_id' in data_dict:
        raise p.toolkit.ValidationError(
            {'resource': ['resource cannot be used with resource_id']})

    if 'resource' not in data_dict and 'resource_id' not in data_dict:
        raise p.toolkit.ValidationError(
            {'resource_id': ['resource_id or resource required']})

    if 'resource' in data_dict:
        has_url = 'url' in data_dict['resource']
        # A datastore only resource does not have a url in the db
        data_dict['resource'].setdefault('url', '_datastore_only_resource')
        resource_dict = p.toolkit.get_action('resource_create')(
            context, data_dict['resource'])
        data_dict['resource_id'] = resource_dict['id']

        # create resource from file
        if has_url:
            if not p.plugin_loaded('datapusher'):
                raise p.toolkit.ValidationError(
                    {'resource': ['The datapusher has to be enabled.']})
            p.toolkit.get_action('datapusher_submit')(
                context, {
                    'resource_id': resource_dict['id'],
                    'set_url_type': True
                })
            # since we'll overwrite the datastore resource anyway, we
            # don't need to create it here
            return

        # create empty resource
        else:
            # no need to set the full url because it will be set in before_show
            resource_dict['url_type'] = 'datastore'
            p.toolkit.get_action('resource_update')(context, resource_dict)
    else:
        if not data_dict.pop('force', False):
            resource_id = data_dict['resource_id']
            _check_read_only(context, resource_id)

    # validate aliases
    aliases = datastore_helpers.get_list(data_dict.get('aliases', []))
    for alias in aliases:
        if not datastore_helpers.is_valid_table_name(alias):
            raise p.toolkit.ValidationError(
                {'alias': [u'"{0}" is not a valid alias name'.format(alias)]})

    try:
        result = backend.create(context, data_dict)
    except InvalidDataError as err:
        raise p.toolkit.ValidationError(text_type(err))

    if data_dict.get('calculate_record_count', False):
        backend.calculate_record_count(data_dict['resource_id'])

    # Set the datastore_active flag on the resource if necessary
    model = _get_or_bust(context, 'model')
    resobj = model.Resource.get(data_dict['resource_id'])
    if resobj.extras.get('datastore_active') is not True:
        log.debug('Setting datastore_active=True on resource {0}'.format(
            resobj.id))
        set_datastore_active_flag(model, data_dict, True)

    result.pop('id', None)
    result.pop('connection_url', None)
    result.pop('records', None)
    return result
Exemple #6
0
def datastore_create(context, data_dict):
    '''Adds a new table to the DataStore.

    The datastore_create action allows you to post JSON data to be
    stored against a resource. This endpoint also supports altering tables,
    aliases and indexes and bulk insertion. This endpoint can be called multiple
    times to initially insert more data, add fields, change the aliases or indexes
    as well as the primary keys.

    To create an empty datastore resource and a CKAN resource at the same time,
    provide ``resource`` with a valid ``package_id`` and omit the ``resource_id``.

    If you want to create a datastore resource from the content of a file,
    provide ``resource`` with a valid ``url``.

    See :ref:`fields` and :ref:`records` for details on how to lay out records.

    :param resource_id: resource id that the data is going to be stored against.
    :type resource_id: string
    :param force: set to True to edit a read-only resource
    :type force: bool (optional, default: False)
    :param resource: resource dictionary that is passed to
        :meth:`~ckan.logic.action.create.resource_create`.
        Use instead of ``resource_id`` (optional)
    :type resource: dictionary
    :param aliases: names for read only aliases of the resource. (optional)
    :type aliases: list or comma separated string
    :param fields: fields/columns and their extra metadata. (optional)
    :type fields: list of dictionaries
    :param records: the data, eg: [{"dob": "2005", "some_stuff": ["a", "b"]}]  (optional)
    :type records: list of dictionaries
    :param primary_key: fields that represent a unique key (optional)
    :type primary_key: list or comma separated string
    :param indexes: indexes on table (optional)
    :type indexes: list or comma separated string

    Please note that setting the ``aliases``, ``indexes`` or ``primary_key`` replaces the exising
    aliases or constraints. Setting ``records`` appends the provided records to the resource.

    **Results:**

    :returns: The newly created data object.
    :rtype: dictionary

    See :ref:`fields` and :ref:`records` for details on how to lay out records.

    '''
    schema = context.get('schema', dsschema.datastore_create_schema())
    records = data_dict.pop('records', None)
    resource = data_dict.pop('resource', None)
    data_dict, errors = _validate(data_dict, schema, context)
    resource_dict = None
    if records:
        data_dict['records'] = records
    if resource:
        data_dict['resource'] = resource
    if errors:
        raise p.toolkit.ValidationError(errors)

    p.toolkit.check_access('datastore_create', context, data_dict)

    if 'resource' in data_dict and 'resource_id' in data_dict:
        raise p.toolkit.ValidationError(
            {'resource': ['resource cannot be used with resource_id']})

    if not 'resource' in data_dict and not 'resource_id' in data_dict:
        raise p.toolkit.ValidationError(
            {'resource_id': ['resource_id or resource required']})

    if 'resource' in data_dict:
        has_url = 'url' in data_dict['resource']
        # A datastore only resource does not have a url in the db
        data_dict['resource'].setdefault('url', '_datastore_only_resource')
        resource_dict = p.toolkit.get_action('resource_create')(
            context, data_dict['resource'])
        data_dict['resource_id'] = resource_dict['id']

        # create resource from file
        if has_url:
            if not p.plugin_loaded('datapusher'):
                raise p.toolkit.ValidationError(
                    {'resource': ['The datapusher has to be enabled.']})
            p.toolkit.get_action('datapusher_submit')(
                context, {
                    'resource_id': resource_dict['id'],
                    'set_url_type': True
                })
            # since we'll overwrite the datastore resource anyway, we
            # don't need to create it here
            return

        # create empty resource
        else:
            # no need to set the full url because it will be set in before_show
            resource_dict['url_type'] = 'datastore'
            p.toolkit.get_action('resource_update')(context, resource_dict)
    else:
        if not data_dict.pop('force', False):
            resource_id = data_dict['resource_id']
            _check_read_only(context, resource_id)

    data_dict['connection_url'] = config['ckan.datastore.write_url']

    # validate aliases
    aliases = datastore_helpers.get_list(data_dict.get('aliases', []))
    for alias in aliases:
        if not db._is_valid_table_name(alias):
            raise p.toolkit.ValidationError(
                {'alias': [u'"{0}" is not a valid alias name'.format(alias)]})

    # create a private datastore resource, if necessary
    model = _get_or_bust(context, 'model')
    resource = model.Resource.get(data_dict['resource_id'])
    legacy_mode = 'ckan.datastore.read_url' not in config
    if not legacy_mode and resource.package.private:
        data_dict['private'] = True

    try:
        result = db.create(context, data_dict)
    except db.InvalidDataError as err:
        raise p.toolkit.ValidationError(unicode(err))

    # Set the datastore_active flag on the resource if necessary
    if resource.extras.get('datastore_active') is not True:
        log.debug('Setting datastore_active=True on resource {0}'.format(
            resource.id))
        # issue #3245: race condition
        update_dict = {'datastore_active': True}

        # get extras(for entity update) and package_id(for search index update)
        res_query = model.Session.query(
            model.resource_table.c.extras,
            model.resource_table.c.package_id).filter(
                model.Resource.id == data_dict['resource_id'])
        extras, package_id = res_query.one()

        # update extras in database for record and its revision
        extras.update(update_dict)
        res_query.update({'extras': extras}, synchronize_session=False)

        model.Session.query(model.resource_revision_table).filter(
            model.ResourceRevision.id == data_dict['resource_id'],
            model.ResourceRevision.current is True).update(
                {'extras': extras}, synchronize_session=False)

        model.Session.commit()

        # get package with  updated resource from solr
        # find changed resource, patch it and reindex package
        psi = search.PackageSearchIndex()
        solr_query = search.PackageSearchQuery()
        q = {
            'q': 'id:"{0}"'.format(package_id),
            'fl': 'data_dict',
            'wt': 'json',
            'fq': 'site_id:"%s"' % config.get('ckan.site_id'),
            'rows': 1
        }
        for record in solr_query.run(q)['results']:
            solr_data_dict = json.loads(record['data_dict'])
            for resource in solr_data_dict['resources']:
                if resource['id'] == data_dict['resource_id']:
                    resource.update(update_dict)
                    psi.index_package(solr_data_dict)
                    break

    result.pop('id', None)
    result.pop('private', None)
    result.pop('connection_url')
    return result
Exemple #7
0
def datastore_create(context, data_dict):
    '''Adds a new table to the DataStore.

    The datastore_create action allows you to post JSON data to be
    stored against a resource. This endpoint also supports altering tables,
    aliases and indexes and bulk insertion. This endpoint can be called
    multiple times to initially insert more data, add fields, change the
    aliases or indexes as well as the primary keys.

    To create an empty datastore resource and a CKAN resource at the same time,
    provide ``resource`` with a valid ``package_id`` and omit the
    ``resource_id``.

    If you want to create a datastore resource from the content of a file,
    provide ``resource`` with a valid ``url``.

    See :ref:`fields` and :ref:`records` for details on how to lay out records.

    :param resource_id: resource id that the data is going to be stored
                        against.
    :type resource_id: string
    :param force: set to True to edit a read-only resource
    :type force: bool (optional, default: False)
    :param resource: resource dictionary that is passed to
        :meth:`~ckan.logic.action.create.resource_create`.
        Use instead of ``resource_id`` (optional)
    :type resource: dictionary
    :param aliases: names for read only aliases of the resource. (optional)
    :type aliases: list or comma separated string
    :param fields: fields/columns and their extra metadata. (optional)
    :type fields: list of dictionaries
    :param records: the data, eg: [{"dob": "2005", "some_stuff": ["a", "b"]}]
                    (optional)
    :type records: list of dictionaries
    :param primary_key: fields that represent a unique key (optional)
    :type primary_key: list or comma separated string
    :param indexes: indexes on table (optional)
    :type indexes: list or comma separated string
    :param triggers: trigger functions to apply to this table on update/insert.
        functions may be created with
        :meth:`~ckanext.datastore.logic.action.datastore_function_create`.
        eg: [
        {"function": "trigger_clean_reference"},
        {"function": "trigger_check_codes"}]
    :type triggers: list of dictionaries
    :param calculate_record_count: updates the stored count of records, used to
        optimize datastore_search in combination with the
        `total_estimation_threshold` parameter. If doing a series of requests
        to change a resource, you only need to set this to True on the last
        request.
    :type calculate_record_count: bool (optional, default: False)

    Please note that setting the ``aliases``, ``indexes`` or ``primary_key``
    replaces the exising aliases or constraints. Setting ``records`` appends
    the provided records to the resource.

    **Results:**

    :returns: The newly created data object, excluding ``records`` passed.
    :rtype: dictionary

    See :ref:`fields` and :ref:`records` for details on how to lay out records.

    '''
    backend = DatastoreBackend.get_active_backend()
    schema = context.get('schema', dsschema.datastore_create_schema())
    records = data_dict.pop('records', None)
    resource = data_dict.pop('resource', None)
    data_dict, errors = _validate(data_dict, schema, context)
    resource_dict = None
    if records:
        data_dict['records'] = records
    if resource:
        data_dict['resource'] = resource
    if errors:
        raise p.toolkit.ValidationError(errors)

    p.toolkit.check_access('datastore_create', context, data_dict)

    if 'resource' in data_dict and 'resource_id' in data_dict:
        raise p.toolkit.ValidationError({
            'resource': ['resource cannot be used with resource_id']
        })

    if 'resource' not in data_dict and 'resource_id' not in data_dict:
        raise p.toolkit.ValidationError({
            'resource_id': ['resource_id or resource required']
        })

    if 'resource' in data_dict:
        has_url = 'url' in data_dict['resource']
        # A datastore only resource does not have a url in the db
        data_dict['resource'].setdefault('url', '_datastore_only_resource')
        resource_dict = p.toolkit.get_action('resource_create')(
            context, data_dict['resource'])
        data_dict['resource_id'] = resource_dict['id']

        # create resource from file
        if has_url:
            if not p.plugin_loaded('datapusher'):
                raise p.toolkit.ValidationError({'resource': [
                    'The datapusher has to be enabled.']})
            p.toolkit.get_action('datapusher_submit')(context, {
                'resource_id': resource_dict['id'],
                'set_url_type': True
            })
            # since we'll overwrite the datastore resource anyway, we
            # don't need to create it here
            return

        # create empty resource
        else:
            # no need to set the full url because it will be set in before_show
            resource_dict['url_type'] = 'datastore'
            p.toolkit.get_action('resource_update')(context, resource_dict)
    else:
        if not data_dict.pop('force', False):
            resource_id = data_dict['resource_id']
            _check_read_only(context, resource_id)

    # validate aliases
    aliases = datastore_helpers.get_list(data_dict.get('aliases', []))
    for alias in aliases:
        if not datastore_helpers.is_valid_table_name(alias):
            raise p.toolkit.ValidationError({
                'alias': [u'"{0}" is not a valid alias name'.format(alias)]
            })

    try:
        result = backend.create(context, data_dict)
    except InvalidDataError as err:
        raise p.toolkit.ValidationError(text_type(err))

    if data_dict.get('calculate_record_count', False):
        backend.calculate_record_count(data_dict['resource_id'])

    # Set the datastore_active flag on the resource if necessary
    model = _get_or_bust(context, 'model')
    resobj = model.Resource.get(data_dict['resource_id'])
    if resobj.extras.get('datastore_active') is not True:
        log.debug(
            'Setting datastore_active=True on resource {0}'.format(resobj.id)
        )
        set_datastore_active_flag(model, data_dict, True)

    result.pop('id', None)
    result.pop('connection_url', None)
    result.pop('records', None)
    return result
Exemple #8
0
def datastore_create(context, data_dict):
    '''Adds a new table to the DataStore.

    The datastore_create action allows you to post JSON data to be
    stored against a resource. This endpoint also supports altering tables,
    aliases and indexes and bulk insertion. This endpoint can be called multiple
    times to initially insert more data, add fields, change the aliases or indexes
    as well as the primary keys.

    To create an empty datastore resource and a CKAN resource at the same time,
    provide ``resource`` with a valid ``package_id`` and omit the ``resource_id``.

    If you want to create a datastore resource from the content of a file,
    provide ``resource`` with a valid ``url``.

    See :ref:`fields` and :ref:`records` for details on how to lay out records.

    :param resource_id: resource id that the data is going to be stored against.
    :type resource_id: string
    :param force: set to True to edit a read-only resource
    :type force: bool (optional, default: False)
    :param resource: resource dictionary that is passed to
        :meth:`~ckan.logic.action.create.resource_create`.
        Use instead of ``resource_id`` (optional)
    :type resource: dictionary
    :param aliases: names for read only aliases of the resource. (optional)
    :type aliases: list or comma separated string
    :param fields: fields/columns and their extra metadata. (optional)
    :type fields: list of dictionaries
    :param records: the data, eg: [{"dob": "2005", "some_stuff": ["a", "b"]}]  (optional)
    :type records: list of dictionaries
    :param primary_key: fields that represent a unique key (optional)
    :type primary_key: list or comma separated string
    :param indexes: indexes on table (optional)
    :type indexes: list or comma separated string

    Please note that setting the ``aliases``, ``indexes`` or ``primary_key`` replaces the exising
    aliases or constraints. Setting ``records`` appends the provided records to the resource.

    **Results:**

    :returns: The newly created data object.
    :rtype: dictionary

    See :ref:`fields` and :ref:`records` for details on how to lay out records.

    '''
    schema = context.get('schema', dsschema.datastore_create_schema())
    records = data_dict.pop('records', None)
    resource = data_dict.pop('resource', None)
    data_dict, errors = _validate(data_dict, schema, context)
    resource_dict = None
    if records:
        data_dict['records'] = records
    if resource:
        data_dict['resource'] = resource
    if errors:
        raise p.toolkit.ValidationError(errors)

    p.toolkit.check_access('datastore_create', context, data_dict)

    if 'resource' in data_dict and 'resource_id' in data_dict:
        raise p.toolkit.ValidationError({
            'resource': ['resource cannot be used with resource_id']
        })

    if not 'resource' in data_dict and not 'resource_id' in data_dict:
        raise p.toolkit.ValidationError({
            'resource_id': ['resource_id or resource required']
        })

    if 'resource' in data_dict:
        has_url = 'url' in data_dict['resource']
        # A datastore only resource does not have a url in the db
        data_dict['resource'].setdefault('url', '_datastore_only_resource')
        resource_dict = p.toolkit.get_action('resource_create')(
            context, data_dict['resource'])
        data_dict['resource_id'] = resource_dict['id']

        # create resource from file
        if has_url:
            if not p.plugin_loaded('datapusher'):
                raise p.toolkit.ValidationError({'resource': [
                    'The datapusher has to be enabled.']})
            p.toolkit.get_action('datapusher_submit')(context, {
                'resource_id': resource_dict['id'],
                'set_url_type': True
            })
            # since we'll overwrite the datastore resource anyway, we
            # don't need to create it here
            return

        # create empty resource
        else:
            # no need to set the full url because it will be set in before_show
            resource_dict['url_type'] = 'datastore'
            p.toolkit.get_action('resource_update')(context, resource_dict)
    else:
        if not data_dict.pop('force', False):
            resource_id = data_dict['resource_id']
            _check_read_only(context, resource_id)

    data_dict['connection_url'] = config['ckan.datastore.write_url']

    # validate aliases
    aliases = datastore_helpers.get_list(data_dict.get('aliases', []))
    for alias in aliases:
        if not db._is_valid_table_name(alias):
            raise p.toolkit.ValidationError({
                'alias': [u'"{0}" is not a valid alias name'.format(alias)]
            })

    # create a private datastore resource, if necessary
    model = _get_or_bust(context, 'model')
    resource = model.Resource.get(data_dict['resource_id'])
    legacy_mode = 'ckan.datastore.read_url' not in config
    if not legacy_mode and resource.package.private:
        data_dict['private'] = True

    try:
        result = db.create(context, data_dict)
    except db.InvalidDataError as err:
        raise p.toolkit.ValidationError(unicode(err))

    # Set the datastore_active flag on the resource if necessary
    if resource.extras.get('datastore_active') is not True:
        log.debug(
            'Setting datastore_active=True on resource {0}'.format(resource.id)
        )
        # issue #3245: race condition
        update_dict = {'datastore_active': True}

        # get extras(for entity update) and package_id(for search index update)
        res_query = model.Session.query(
            model.resource_table.c.extras,
            model.resource_table.c.package_id
        ).filter(
            model.Resource.id == data_dict['resource_id']
        )
        extras, package_id = res_query.one()

        # update extras in database for record and its revision
        extras.update(update_dict)
        res_query.update({'extras': extras}, synchronize_session=False)

        model.Session.query(model.resource_revision_table).filter(
            model.ResourceRevision.id == data_dict['resource_id'],
            model.ResourceRevision.current is True
        ).update({'extras': extras}, synchronize_session=False)

        model.Session.commit()

        # get package with  updated resource from solr
        # find changed resource, patch it and reindex package
        psi = search.PackageSearchIndex()
        solr_query = search.PackageSearchQuery()
        q = {
            'q': 'id:"{0}"'.format(package_id),
            'fl': 'data_dict',
            'wt': 'json',
            'fq': 'site_id:"%s"' % config.get('ckan.site_id'),
            'rows': 1
        }
        for record in solr_query.run(q)['results']:
            solr_data_dict = json.loads(record['data_dict'])
            for resource in solr_data_dict['resources']:
                if resource['id'] == data_dict['resource_id']:
                    resource.update(update_dict)
                    psi.index_package(solr_data_dict)
                    break

    result.pop('id', None)
    result.pop('private', None)
    result.pop('connection_url')
    return result