Exemple #1
0
def _resource_exists(context, data_dict):
    ''' Returns true if the resource exists in CKAN and in the datastore '''
    model = _get_or_bust(context, 'model')
    res_id = _get_or_bust(data_dict, 'resource_id')
    if not model.Resource.get(res_id):
        return False

    resources_sql = sqlalchemy.text(u'''SELECT 1 FROM "_table_metadata"
                                        WHERE name = :id AND alias_of IS NULL'''
                                    )
    results = db.get_read_engine().execute(resources_sql, id=res_id)
    return results.rowcount > 0
Exemple #2
0
    def configure(self, config):
        self.config = config
        # check for ckan.datastore.write_url and ckan.datastore.read_url
        if (not 'ckan.datastore.write_url' in config):
            error_msg = 'ckan.datastore.write_url not found in config'
            raise DatastoreException(error_msg)

        # Legacy mode means that we have no read url. Consequently sql search is not
        # available and permissions do not have to be changed. In legacy mode, the
        # datastore runs on PG prior to 9.0 (for example 8.4).
        self.legacy_mode = _is_legacy_mode(self.config)

        # Check whether users have disabled datastore_search_sql
        self.enable_sql_search = p.toolkit.asbool(
            self.config.get('ckan.datastore.sqlsearch.enabled', True))

        datapusher_formats = config.get('datapusher.formats', '').split()
        self.datapusher_formats = datapusher_formats or DEFAULT_FORMATS

        # Check whether we are running one of the paster commands which means
        # that we should ignore the following tests.
        if sys.argv[0].split(
                '/')[-1] == 'paster' and 'datastore' in sys.argv[1:]:
            log.warn('Omitting permission checks because you are '
                     'running paster commands.')
            return

        self.ckan_url = self.config['sqlalchemy.url']
        self.write_url = self.config['ckan.datastore.write_url']
        if self.legacy_mode:
            self.read_url = self.write_url
            log.warn('Legacy mode active. '
                     'The sql search will not be available.')
        else:
            self.read_url = self.config['ckan.datastore.read_url']

        self.read_engine = db.get_read_engine()
        if not model.engine_is_pg(self.read_engine):
            log.warn('We detected that you do not use a PostgreSQL '
                     'database. The DataStore will NOT work and DataStore '
                     'tests will be skipped.')
            return

        if self._is_read_only_database():
            log.warn('We detected that CKAN is running on a read '
                     'only database. Permission checks and the creation '
                     'of _table_metadata are skipped.')
        else:
            self._check_urls_and_permissions()
Exemple #3
0
def datastore_delete(context, data_dict):
    '''Deletes a table or a set of records from the DataStore.

    :param resource_id: resource id that the data will be deleted from. (optional)
    :type resource_id: string
    :param force: set to True to edit a read-only resource
    :type force: bool (optional, default: False)
    :param filters: filters to apply before deleting (eg {"name": "fred"}).
                   If missing delete whole table and all dependent views. (optional)
    :type filters: dictionary

    **Results:**

    :returns: Original filters sent.
    :rtype: dictionary

    '''
    schema = context.get('schema', dsschema.datastore_upsert_schema())

    # Remove any applied filters before running validation.
    filters = data_dict.pop('filters', None)
    data_dict, errors = _validate(data_dict, schema, context)

    if filters is not None:
        if not isinstance(filters, dict):
            raise p.toolkit.ValidationError(
                {'filters': ['filters must be either a dict or null.']})
        data_dict['filters'] = filters

    if errors:
        raise p.toolkit.ValidationError(errors)

    p.toolkit.check_access('datastore_delete', context, data_dict)

    if not data_dict.pop('force', False):
        resource_id = data_dict['resource_id']
        _check_read_only(context, resource_id)

    res_id = data_dict['resource_id']
    resources_sql = sqlalchemy.text(u'''SELECT 1 FROM "_table_metadata"
                                        WHERE name = :id AND alias_of IS NULL'''
                                    )
    results = db.get_read_engine().execute(resources_sql, id=res_id)
    res_exists = results.rowcount > 0

    if not res_exists:
        raise p.toolkit.ObjectNotFound(
            p.toolkit._(u'Resource "{0}" was not found.'.format(res_id)))

    result = db.delete(context, data_dict)

    # Set the datastore_active flag on the resource if necessary
    model = _get_or_bust(context, 'model')
    resource = model.Resource.get(data_dict['resource_id'])

    if (not data_dict.get('filters')
            and resource.extras.get('datastore_active') is True):
        log.debug('Setting datastore_active=False on resource {0}'.format(
            resource.id))
        set_datastore_active_flag(model, data_dict, False)

    result.pop('id', None)
    return result
Exemple #4
0
def datastore_info(context, data_dict):
    '''
    Returns information about the data imported, such as column names
    and types.

    :rtype: A dictionary describing the columns and their types.
    :param id: Id of the resource we want info about
    :type id: A UUID
    '''
    def _type_lookup(t):
        if t in ['numeric', 'integer']:
            return 'number'

        if t.startswith('timestamp'):
            return "date"

        return "text"

    p.toolkit.check_access('datastore_info', context, data_dict)

    resource_id = _get_or_bust(data_dict, 'id')
    resource = p.toolkit.get_action('resource_show')(context, {
        'id': resource_id
    })

    resources_sql = sqlalchemy.text(u'''SELECT 1 FROM "_table_metadata"
                                        WHERE name = :id AND alias_of IS NULL'''
                                    )
    results = db.get_read_engine().execute(resources_sql, id=resource_id)
    res_exists = results.rowcount > 0
    if not res_exists:
        raise p.toolkit.ObjectNotFound(
            p.toolkit._(u'Resource "{0}" was not found.'.format(resource_id)))

    info = {'schema': {}, 'meta': {}}

    schema_results = None
    meta_results = None
    try:
        schema_sql = sqlalchemy.text(u'''
            SELECT column_name, data_type
            FROM INFORMATION_SCHEMA.COLUMNS WHERE table_name = :resource_id;
        ''')
        schema_results = db.get_read_engine().execute(schema_sql,
                                                      resource_id=resource_id)
        for row in schema_results.fetchall():
            k = row[0]
            v = row[1]
            if k.startswith('_'):  # Skip internal rows
                continue
            info['schema'][k] = _type_lookup(v)

        # We need to make sure the resource_id is a valid resource_id before we use it like
        # this, we have done that above.
        meta_sql = sqlalchemy.text(u'''
            SELECT count(_id) FROM "{0}";
        '''.format(resource_id))
        meta_results = db.get_read_engine().execute(meta_sql,
                                                    resource_id=resource_id)
        info['meta']['count'] = meta_results.fetchone()[0]
    finally:
        if schema_results:
            schema_results.close()
        if meta_results:
            meta_results.close()

    return info