コード例 #1
0
def detect_deleted_rows(data_dict, connection, new_record, primary_key):
    '''This methode detects the deleted rows. It detects only if the dataset is getting edited
    otherwise there is nothing to do. It must be better if this informartion (Edit or Create)
    came from Listeners (update, save Listeners) directly. This would reduce the complexity
    in the backend'''
    detect_dict = {}
    new_ids = [dict.get(primary_key, None) for dict in new_record]
    new_ids.sort()

    resource_id = data_dict['resource_id']

    select = u'''SELECT MAX({primary_key}) FROM {table}'''. \
        format(table=identifier(resource_id), primary_key=identifier(primary_key))
    rs = connection.execute(select)
    max_id_in_database = rs.first()[0]

    if new_ids[0] > max_id_in_database:
        # create mode therefore nothing do detect
        reset_min_max_id()
        detect_dict['mode'] = 'create'
        return detect_dict
    else:
        # edit mode detect deleted rows
        detect_dict['mode'] = 'edit'

        find_min_and_max_id(new_ids, primary_key, resource_id, connection)

        if current_entry_properties.min_id > current_entry_properties.max_id:
            current_entry_properties.min_id = current_entry_properties.max_id - 250

        select = u'''SELECT * FROM {table} WHERE {primary_key} BETWEEN {min} AND {max} ORDER BY {primary_key}'''. \
            format(table=identifier(data_dict['resource_id']), primary_key=identifier(primary_key),
                   min=current_entry_properties.min_id, max=current_entry_properties.max_id)

        rs = connection.execute(select)
        old_record = refine_results(rs, rs.keys())

        old_ids = [int(dict.get(primary_key, None)) for dict in old_record]

        # this to set the offset for the next chunk of data
        current_entry_properties.min_id = current_entry_properties.max_id + 1

        detect_dict['old_ids'] = old_ids
        detect_dict['old_record'] = old_record

        rows_to_delete = list(set(old_ids) - set(new_ids))


        if not rows_to_delete:
            detect_dict['new_record'] = new_record
            return detect_dict

        delete_deleted_rows(rows_to_delete, connection, data_dict, primary_key)

        new_record_after_delete = [dict for dict in new_record if dict.get(primary_key, None) not in rows_to_delete]

        detect_dict['new_record'] = new_record_after_delete

        return detect_dict
コード例 #2
0
def create_versioning_trigger(data_dict, connection):
    connection.execute(
        u'''CREATE TRIGGER {trigger}
            BEFORE INSERT OR UPDATE OR DELETE ON {table}
            FOR EACH ROW EXECUTE PROCEDURE versioning('sys_period',
                                          '{table_history}',
                                          true);'''.format(
            trigger=identifier(data_dict['resource_id'] + '_trigger'), table=identifier(data_dict['resource_id']),
            table_history=identifier(data_dict['resource_id'] + '_history')))
コード例 #3
0
ファイル: cli.py プロジェクト: pdelboca/ckan
def permissions_sql(maindb, datastoredb, mainuser, writeuser, readuser):
    template_filename = os.path.join(
        os.path.dirname(datastore_module.__file__), u'set_permissions.sql')
    with open(template_filename) as fp:
        template = fp.read()
    return template.format(maindb=identifier(maindb),
                           datastoredb=identifier(datastoredb),
                           mainuser=identifier(mainuser),
                           writeuser=identifier(writeuser),
                           readuser=identifier(readuser))
コード例 #4
0
ファイル: commands.py プロジェクト: CIOIL/DataGovIL
def permissions_sql(maindb, datastoredb, mainuser, writeuser, readuser):
    template_filename = os.path.join(os.path.dirname(__file__),
                                     u'set_permissions.sql')
    with open(template_filename) as fp:
        template = fp.read()
    return template.format(
        maindb=identifier(maindb),
        datastoredb=identifier(datastoredb),
        mainuser=identifier(mainuser),
        writeuser=identifier(writeuser),
        readuser=identifier(readuser))
コード例 #5
0
def datastore_run_triggers(context, data_dict):
    ''' update each record with trigger

    The datastore_run_triggers API action allows you to re-apply existing
    triggers to an existing DataStore resource.

    :param resource_id: resource id that the data is going to be stored under.
    :type resource_id: string

    **Results:**

    :returns: The rowcount in the table.
    :rtype: int

    '''
    res_id = data_dict['resource_id']
    p.toolkit.check_access('datastore_run_triggers', context, data_dict)
    backend = DatastoreBackend.get_active_backend()
    connection = backend._get_write_engine().connect()

    sql = sqlalchemy.text(u'''update {0} set _id=_id '''.format(
        identifier(res_id)))
    try:
        results = connection.execute(sql)
    except sqlalchemy.exc.DatabaseError as err:
        message = six.ensure_text(err.args[0].split('\n')[0])
        raise p.toolkit.ValidationError(
            {u'records': [message.split(u') ', 1)[-1]]})
    return results.rowcount
コード例 #6
0
def find_min_and_max_id(new_ids, primary_key, resource_id, connection):
    '''We have to compare exactly the same block of data
    from database as well as from new record.
    e.g. chunk 2 has entries between 501-750. Therefore we have to query the entries
    in the database between 501-750. Otherwise detection would not be exact'''
    if current_entry_properties.min_id is None or new_ids[0] < current_entry_properties.min_id:
        select = u'''SELECT MIN({primary_key}) FROM {table}'''. \
            format(table=identifier(resource_id), primary_key=identifier(primary_key))
        rs = connection.execute(select)
        current_entry_properties.min_id = rs.first()[0]

    current_entry_properties.max_id = current_entry_properties.min_id + 249
    last_id = new_ids[-1]

    if last_id > current_entry_properties.max_id:
        current_entry_properties.max_id = last_id
コード例 #7
0
ファイル: action.py プロジェクト: gsueur/ckan
def datastore_run_triggers(context, data_dict):
    ''' update each record with trigger

    The datastore_run_triggers API action allows you to re-apply exisitng
    triggers to an existing DataStore resource.

    :param resource_id: resource id that the data is going to be stored under.
    :type resource_id: string

    **Results:**

    :returns: The rowcount in the table.
    :rtype: int

    '''
    res_id = data_dict['resource_id']
    p.toolkit.check_access('datastore_trigger_each_row', context, data_dict)
    backend = DatastoreBackend.get_active_backend()
    connection = backend._get_write_engine().connect()

    sql = sqlalchemy.text(u'''update {0} set _id=_id '''.format(
                          identifier(res_id)))
    try:
        results = connection.execute(sql)
    except sqlalchemy.exc.DatabaseError as err:
        message = err.args[0].split('\n')[0].decode('utf8')
        raise p.toolkit.ValidationError({
                u'records': [message.split(u') ', 1)[-1]]})
    return results.rowcount
コード例 #8
0
def create_history_table(data_dict, engine):
    columns = u", ".join([u'{0} {1}'.format(
        identifier(f['id']), f['type']) for f in data_dict['fields']])

    engine.execute(
        u' CREATE TABLE IF NOT EXISTS "{name}"({columns});'.format(
            name=data_dict['resource_id'],
            columns=columns
        )
    )
コード例 #9
0
def get_old_columns_number(connection, resource_id):
    '''
    to make generic the database name must come from
    ckan config file
    e.g.
    database_url=config.get('ckan.datastore.write_url',None)
    dabase_name=database_url.split('@')[1].split('/')'''

    query = u'''SELECT COUNT(*) FROM INFORMATION_SCHEMA.COLUMNS
    WHERE table_catalog = 'datastore_default'
    AND table_name = {table_name}'''.format(table_name=identifier(resource_id))

    return connection.execute(query)
コード例 #10
0
def create_op_type_trigger(table, table_history, connection):
    connection.execute(
        u'''CREATE OR REPLACE FUNCTION add_operation_type() RETURNS trigger AS $$
        BEGIN
            IF (TG_OP = 'DELETE') THEN
                UPDATE {table_history} SET op_type = 'DELETE' WHERE upper(sys_period) = (SELECT MAX(upper(sys_period)) FROM {table_history});
            ELSIF (TG_OP = 'UPDATE') THEN
                UPDATE {table_history} SET op_type = 'UPDATE' WHERE upper(sys_period) = (SELECT MAX(upper(sys_period)) FROM {table_history});
            END IF;
            RETURN NULL;
        END;
        $$ 
        LANGUAGE plpgsql;
        CREATE TRIGGER {trigger}
        AFTER INSERT OR UPDATE OR DELETE ON {table}
        FOR EACH ROW EXECUTE PROCEDURE add_operation_type();'''.format(table_history=table_history,
                                                                       trigger=identifier(table + '_trigger'),
                                                                       table=table)
    )
コード例 #11
0
    def create(self, context, data_dict):
        u'''datacitation extension will only be activated if the dataset has
        an unique field otherwise it will proceed according to CKAN standard
        '''

        records = data_dict.get('records', None)

        current_entry_properties.primary_key = find_primary_key(records)

        if current_entry_properties.primary_key is None:
            raise InvalidDataError(
                toolkit._("The data has no unique field!"))
        else:
            if super(VersionedDatastorePostgresqlBackend, self).resource_exists(data_dict['resource_id']):
                # CKAN Datapusher pushes the entries in chunks of 250 entries
                # Because of that after pushing 250 entries, the table will exist.
                # Therefore if the table exists it does not automatically
                # indicate that it is an update. There is another manual check
                # to distinguish between UPDATE and CREATE.
                # If would be better, if it is determined at UI level

                if not records:
                    return

                detect_dict = detect_deleted_rows(data_dict, self.connection, records,
                                                  current_entry_properties.primary_key)

                if detect_dict.get('mode', None) == 'create':
                    return super(VersionedDatastorePostgresqlBackend, self).create(context, data_dict)

                old_record = detect_dict['old_record']
                record_after_delete = detect_dict['new_record']
                old_ids = detect_dict['old_ids']

                # there is also other checks to do
                # TODO check if all fields name are the same if updating dataset
                # TODO check if the number of columns is equal

                data_dict['method'] = 'update'
                data_dict['primary_key'] = current_entry_properties.primary_key

                updated_rows = detect_updated_rows(record_after_delete, old_record,
                                                   current_entry_properties.primary_key)

                insert_data = detect_inserted_rows(record_after_delete, old_ids, current_entry_properties.primary_key)

                data_dict['records'] = updated_rows


                super(VersionedDatastorePostgresqlBackend, self).upsert(context, data_dict)

                data_dict['method'] = 'insert'
                data_dict['records'] = insert_data

                return super(VersionedDatastorePostgresqlBackend, self).upsert(context, data_dict)
            else:
                fields = data_dict.get('fields', None)
                records = data_dict.get('records', None)
                fields.append(
                    {
                        "id": "sys_period",
                        "type": "tstzrange"
                    }
                )
                if records is not None:
                    for r in records:
                        r['sys_period'] = DateTimeTZRange(datetime.now(), None)

                data_dict['primary_key'] = current_entry_properties.primary_key
                data_dict['fields'] = fields
                data_dict['records'] = records
                datastore_fields = [
                    {'id': '_id', 'type': 'integer'},
                    {'id': '_full_text', 'type': 'tsvector'},
                ]
                extra_field = [
                    {
                        "id": "op_type",
                        "type": "text"
                    }
                ]
                fields_of_history_table = datastore_fields + list(fields) + extra_field
                history_data_dict = {
                    "fields": fields_of_history_table,
                    "resource_id": data_dict['resource_id'] + '_history'
                }
                create_history_table(history_data_dict, self.engine)
                result = super(VersionedDatastorePostgresqlBackend, self).create(context, data_dict)
                create_versioning_trigger(data_dict, self.connection)
                create_op_type_trigger(identifier(data_dict['resource_id']),
                                       identifier(data_dict['resource_id'] + '_history'), self.connection)

                return result
コード例 #12
0
def delete_deleted_rows(rows_to_delete, connection, data_dict, primary_key):
    for id in rows_to_delete:
        delete_sql = u'''DELETE FROM {table} WHERE {primary_key}={id}'''.format(
            table=identifier(data_dict['resource_id']), primary_key=identifier(primary_key), id=id)
        connection.execute(delete_sql)
コード例 #13
0
def create_table_knowledgehub(context, data_dict):
    '''Creates table, columns and column info (stored as comments).

    :param resource_id: The resource ID (i.e. postgres table name)
    :type resource_id: string
    :param fields: details of each field/column, each with properties:
        id - field/column name
        type - optional, otherwise it is guessed from the first record
        info - some field/column properties, saved as a JSON string in postgres
            as a column comment. e.g. "type_override", "label", "notes"
    :type fields: list of dicts
    :param records: records, of which the first is used when a field type needs
        guessing.
    :type records: list of dicts
    '''

    datastore_fields = [
        {
            'id': '_id',
            'type': 'serial primary key'
        },
        {
            'id': '_full_text',
            'type': 'tsvector'
        },
    ]

    # check first row of data for additional fields
    extra_fields = []
    supplied_fields = data_dict.get('fields', [])
    check_fields(context, supplied_fields)
    field_ids = _pluck('id', supplied_fields)
    records = data_dict.get('records')

    fields_errors = []

    for field_id in field_ids:
        # Postgres has a limit of 63 characters for a column name
        if len(field_id) > 63:
            raise ValidationError({
                'field': [
                    'Column heading exceeds limit of '
                    '63 characters. "{0}" '.format(field_id)
                ]
            })

    if fields_errors:
        raise ValidationError({'fields': fields_errors})
    # if type is field is not given try and guess or throw an error
    for field in supplied_fields:
        if 'type' not in field:
            if not records or field['id'] not in records[0]:
                raise ValidationError({
                    'fields':
                    [u'"{0}" type not guessable'.format(field['id'])]
                })
            field['type'] = _guess_type(records[0][field['id']])

    # Check for duplicate fields
    unique_fields = set([f['id'] for f in supplied_fields])
    if not len(unique_fields) == len(supplied_fields):
        all_duplicates = set()
        field_ids = _pluck('id', supplied_fields)
        for field_id in field_ids:
            if field_ids.count(field_id) > 1:
                all_duplicates.add(field_id)
        string_fields = ", ".join(all_duplicates)
        raise ValidationError({
            'field': [
                'Duplicate column names are not supported! '
                'Duplicate columns are : "{0}" '.format(string_fields)
            ]
        })

    if records:
        # check record for sanity
        if not isinstance(records[0], dict):
            raise ValidationError(
                {'records': ['The first row is not a json object']})
        supplied_field_ids = records[0].keys()
        for field_id in supplied_field_ids:
            if field_id not in field_ids:
                extra_fields.append({
                    'id': field_id,
                    'type': _guess_type(records[0][field_id])
                })

    fields = datastore_fields + supplied_fields + extra_fields
    sql_fields = u", ".join(
        [u'{0} {1}'.format(identifier(f['id']), f['type']) for f in fields])

    sql_string = u'CREATE TABLE {0} ({1});'.format(
        identifier(data_dict['resource_id']), sql_fields)

    info_sql = []
    for f in supplied_fields:
        info = f.get(u'info')
        if isinstance(info, dict):
            info_sql.append(u'COMMENT ON COLUMN {0}.{1} is {2}'.format(
                identifier(data_dict['resource_id']), identifier(f['id']),
                literal_string(json.dumps(info, ensure_ascii=False))))

    context['connection'].execute(
        (sql_string + u';'.join(info_sql)).replace(u'%', u'%%'))