Beispiel #1
0
def process_nonpatient_table(table: Table, engine: Engine,
                             progargs: Any) -> None:
    if progargs.rcep:
        return
    log.info("Preprocessing non-patient table {}".format(repr(table.name)))
    pk_col = get_effective_int_pk_col(table)
    other_pk_col = pk_col if pk_col != CRATE_COL_PK else None
    if other_pk_col:  # table has a primary key already
        crate_pk_col = Column(CRATE_COL_PK, BigInteger, nullable=True)
    else:
        crate_pk_col = make_bigint_autoincrement_column(
            CRATE_COL_PK, engine.dialect)
    table.append_column(crate_pk_col)  # must be Table-bound, as above
    add_columns(engine, table, [crate_pk_col])
    if not progargs.print:
        ensure_columns_present(engine,
                               tablename=table.name,
                               column_names=[CRATE_COL_PK])
    if other_pk_col:
        execute(
            engine, """
            UPDATE {tablename} SET {crate_pk} = {rio_pk}
            WHERE {crate_pk} IS NULL
        """.format(tablename=table.name,
                   crate_pk=CRATE_COL_PK,
                   rio_pk=other_pk_col))
    add_indexes(engine, table, [{
        'index_name': CRATE_IDX_PK,
        'column': CRATE_COL_PK,
        'unique': True
    }])
Beispiel #2
0
def process_master_patient_table(table: Table, engine: Engine,
                                 progargs: Any) -> None:
    crate_col_nhs_number = Column(CRATE_COL_NHS_NUMBER,
                                  BigInteger,
                                  nullable=True)
    table.append_column(crate_col_nhs_number)
    add_columns(engine, table, [crate_col_nhs_number])
    if progargs.rcep:
        nhscol = RCEP_COL_NHS_NUMBER
    else:
        nhscol = RIO_COL_NHS_NUMBER
    log.info("Table {}: updating column {}".format(repr(table.name),
                                                   repr(nhscol)))
    ensure_columns_present(engine, tablename=table.name, column_names=[nhscol])
    if not progargs.print:
        ensure_columns_present(engine,
                               tablename=table.name,
                               column_names=[CRATE_COL_NHS_NUMBER])
    execute(
        engine, """
        UPDATE {tablename} SET
            {nhs_number_int} = CAST({nhscol} AS BIGINT)
            WHERE {nhs_number_int} IS NULL
    """.format(
            tablename=table.name,
            nhs_number_int=CRATE_COL_NHS_NUMBER,
            nhscol=nhscol,
        ))
def create_view(name: str,
                selectable: FromClause,
                metadata: MetaData,
                materialized: bool = False) -> Table:
    """
    Args:
        name            => name of materialized view to create
        selectable      => query to create view as
        metadata        => metadata to listen for events on
        materialized    => whether to create standard or materialized view
    Returns:
        Table object bound to temporary MetaData object with columns
        returned from selectable (essentially creates table as view).
        NOTE:
            For non-postgresql backends, creating a materialized view
            will result in a standard view, which cannot be indexed.
    Preconditions:
        N/A
    Raises:
        N/A
    """
    _tmp_mt = MetaData()
    tbl = Table(name, _tmp_mt)
    for column in selectable.c:
        tbl.append_column(
            Column(column.name, column.type, primary_key=column.primary_key))
    listen(metadata, "after_create",
           (CreateMaterializedViewExpression(name, selectable)
            if materialized else CreateViewExpression(name, selectable)))
    listen(
        metadata, "before_drop",
        DropMaterializedViewExpression(name)
        if materialized else DropViewExpression(name))
    return tbl
Beispiel #4
0
    def _exclude_columns_table(table):
        new_table = Table(table.name, MetaData())

        for c in table.columns:
            if c.name not in columns:
                new_table.append_column(c.copy())
        return new_table
Beispiel #5
0
def get_sa_new_table(metadata,
                     table_name,
                     new_table_name,
                     smallest_int_types=False):
    ' create new table with data from table '
    assert metadata.is_bound(), 'Metadata is not bound'
    table = Table(table_name, metadata, autoload=True)

    # get smallest int types for all data in the table
    if smallest_int_types:
        min_max_types = get_sa_table_int_min_max(table)
        if min_max_types:
            smallest_int_types = {}
            for presto_col, (min_val, max_val) in min_max_types.items():
                smallest_int_type = get_presto_smallest_int_type_min_max(
                    min_val, max_val)
                smallest_int_types.update({presto_col: smallest_int_type})

    new_table = Table(new_table_name, metadata)
    for column in table.columns:
        if smallest_int_types:
            smallest_int_type = smallest_int_types.get(column.name, None)
            if smallest_int_type:
                new_table.append_column(
                    sa.Column(column.name, smallest_int_type()))
                continue
        new_table.append_column(sa.Column(column.name, column.type))
    return new_table
Beispiel #6
0
    def create_table(self,
                     table_name,
                     primary_id='id',
                     primary_type='Integer'):
        """
        Creates a new table. The new table will automatically have an `id` column
        unless specified via optional parameter primary_id, which will be used
        as the primary key of the table. Automatic id is set to be an
        auto-incrementing integer, while the type of custom primary_id can be a
        String or an Integer as specified with primary_type flag. The default
        length of String is 255. The caller can specify the length.
        The caller will be responsible for the uniqueness of manual primary_id.

        This custom id feature is only available via direct create_table call.

        Returns a :py:class:`Table <dataset.Table>` instance.
        ::

            table = db.create_table('population')

            # custom id and type
            table2 = db.create_table('population2', 'age')
            table3 = db.create_table('population3', primary_id='race', primary_type='String')
            # custom length of String
            table4 = db.create_table('population4', primary_id='race', primary_type='String(50)')
        """
        table_name = self._valid_table_name(table_name)
        self._acquire()
        try:
            log.debug("Creating table: %s on %r" % (table_name, self.engine))
            match = re.match(r'^(Integer)$|^(String)(\(\d+\))?$', primary_type)
            if match:
                if match.group(1) == 'Integer':
                    auto_flag = False
                    if primary_id == 'id':
                        auto_flag = True
                    col = Column(primary_id,
                                 Integer,
                                 primary_key=True,
                                 autoincrement=auto_flag)
                elif not match.group(3):
                    col = Column(primary_id, String(255), primary_key=True)
                else:
                    len_string = int(match.group(3)[1:-1])
                    len_string = min(len_string, 255)
                    col = Column(primary_id,
                                 String(len_string),
                                 primary_key=True)
            else:
                raise DatasetException(
                    "The primary_type has to be either 'Integer' or 'String'.")

            table = SQLATable(table_name, self.metadata, schema=self.schema)
            table.append_column(col)
            table.create(self.engine)
            self._tables[table_name] = table
            return Table(self, table)
        finally:
            self._release()
Beispiel #7
0
		def wrap(fn):
			table_definition = TableDefinition()
			fn(table_definition)
			table = Table(name, self.meta)
			for attrname in table_definition.fields.keys():
				args, kw = table_definition.fields[attrname]
				table.append_column(Column(attrname, *args, **kw))
			table.create()
Beispiel #8
0
def create_table(engine, table_name):
    log.debug("Creating table: %s on %r" % (table_name, engine))
    table = Table(table_name, engine._metadata)
    col = Column('id', Integer, primary_key=True)
    table.append_column(col)
    table.create(engine)
    TABLES[engine][table_name] = table
    return table
Beispiel #9
0
 def _create_table(self, table_name):
     table_name = validate_name(table_name)
     log.debug("Creating table: %s on %r" % (table_name, self.engine))
     table = Table(table_name, self.meta)
     col = Column(ID_COLUMN, Integer, primary_key=True)
     table.append_column(col)
     table.create(self.engine)
     return table
Beispiel #10
0
 def _create_table(self, table_name):
     table_name = validate_name(table_name)
     log.debug("Creating table: %s on %r" % (table_name, self.engine))
     table = Table(table_name, self.meta)
     col = Column(ID_COLUMN, Integer, primary_key=True)
     table.append_column(col)
     table.create(self.engine)
     return table
Beispiel #11
0
        def _table(table):
            src_table = table
            new_table = Table(src_table.name, MetaData())

            for c in src_table.columns:
                if c.name not in columns:
                    new_table.append_column(c.copy())
            return new_table
Beispiel #12
0
def copy_table(table):
    """
     渡されたテーブルをコピーします
    """
    ret_table = Table(table.name, MetaData())
    for c in table.columns:
        ret_table.append_column(copy_column(c))

    return ret_table
Beispiel #13
0
def create_table(engine, table_name):
    with lock:
        log.debug("Creating table: %s on %r" % (table_name, engine))
        table = Table(table_name, engine._metadata)
        col = Column('id', Integer, primary_key=True)
        table.append_column(col)
        table.create(engine)
        engine._tables[table_name] = table
        return table
Beispiel #14
0
def create_table(engine, table_name):
    with lock:
        log.debug("Creating table: %s on %r" % (table_name, engine))
        table = Table(table_name, engine._metadata)
        col = Column("id", Integer, primary_key=True)
        table.append_column(col)
        table.create(engine)
        engine._tables[table_name] = table
        return table
Beispiel #15
0
    def _rename_columns_table(table):
        new_table = Table(table.name, MetaData())

        for c in table.columns:
            renamed_column = c.copy()
            if c.name in src_columns:
                renamed_column.name = maps[c.name]
            new_table.append_column(renamed_column)

        return new_table
Beispiel #16
0
	def wrap(fn):
		table_definition = OrderedProperties()
		fn(table_definition)
		table = Table(table_name, g.db_meta)
		for attrname in table_definition.keys():
			value = table_definition[attrname]
			if isinstance(value, Column):
				table.append_column(value)
			elif isinstance(value, Constraint):
				table.append_constraint(value)
		table.create(g.db_engine)
Beispiel #17
0
    def create_table(self, table_name, primary_id='id', primary_type='Integer'):
        """
        Create a new table.

        The new table will automatically have an `id` column unless specified via
        optional parameter primary_id, which will be used as the primary key of the
        table. Automatic id is set to be an auto-incrementing integer, while the
        type of custom primary_id can be a
        String or an Integer as specified with primary_type flag. The default
        length of String is 255. The caller can specify the length.
        The caller will be responsible for the uniqueness of manual primary_id.

        This custom id feature is only available via direct create_table call.

        Returns a :py:class:`Table <dataset.Table>` instance.
        ::

            table = db.create_table('population')

            # custom id and type
            table2 = db.create_table('population2', 'age')
            table3 = db.create_table('population3', primary_id='race', primary_type='String')
            # custom length of String
            table4 = db.create_table('population4', primary_id='race', primary_type='String(50)')
        """
        table_name = self._valid_table_name(table_name)
        self._acquire()
        try:
            log.debug("Creating table: %s on %r" % (table_name, self.engine))
            match = re.match(r'^(Integer)$|^(String)(\(\d+\))?$', primary_type)
            if match:
                if match.group(1) == 'Integer':
                    auto_flag = False
                    if primary_id == 'id':
                        auto_flag = True
                    col = Column(primary_id, Integer, primary_key=True, autoincrement=auto_flag)
                elif not match.group(3):
                    col = Column(primary_id, String(255), primary_key=True)
                else:
                    len_string = int(match.group(3)[1:-1])
                    len_string = min(len_string, 255)
                    col = Column(primary_id, String(len_string), primary_key=True)
            else:
                raise DatasetException(
                    "The primary_type has to be either 'Integer' or 'String'.")

            table = SQLATable(table_name, self.metadata, schema=self.schema)
            table.append_column(col)
            table.create(self.engine)
            self._tables[table_name] = table
            return Table(self, table)
        finally:
            self._release()
Beispiel #18
0
        def _table(table):
            left_table = table
            right_table = tables[right_table_name]

            new_table = Table(left_table.name, MetaData())

            for c in left_table.columns:
                new_table.append_column(_copy_column(c))

            for c in right_table.columns:
                new_table.append_column(_copy_column(c))

            return new_table
Beispiel #19
0
class TableHandler(object):

    """ Used by automatically generated objects such as datasets
    and dimensions to generate, write and clear the table under
    its management. """

    def _init_table(self, meta, namespace, name, id_type=Integer):
        """ Create the given table if it does not exist, otherwise
        reflect the current table schema from the database.
        """
        name = namespace + '__' + name
        self.table = Table(name, meta)
        if id_type is not None:
            col = Column('id', id_type, primary_key=True)
            self.table.append_column(col)

    def _generate_table(self):
        """ Create the given table if it does not exist. """
        # TODO: make this support some kind of migration?
        if not db.engine.has_table(self.table.name):
            self.table.create(db.engine)

    def _upsert(self, bind, data, unique_columns):
        """ Upsert a set of values into the table. This will
        query for the set of unique columns and either update an
        existing row or create a new one. In both cases, the ID
        of the changed row will be returned. """
        key = and_(*[self.table.c[c] == data.get(c)
                     for c in unique_columns])
        q = self.table.update(key, data)
        if bind.execute(q).rowcount == 0:
            q = self.table.insert(data)
            rs = bind.execute(q)
            return rs.inserted_primary_key[0]
        else:
            q = self.table.select(key)
            row = bind.execute(q).fetchone()
            return row['id']

    def _flush(self, bind):
        """ Delete all rows in the table. """
        q = self.table.delete()
        bind.execute(q)

    def _drop(self, bind):
        """ Drop the table and the local reference to it. """
        if db.engine.has_table(self.table.name):
            self.table.drop()
        del self.table
Beispiel #20
0
def process_table(table: Table, engine: Engine,
                  configoptions: PcmisConfigOptions) -> None:
    """
    Processes a PCMIS table by checking it has appropriate columns, perhaps
    adding a CRATE integer PK, and indexing it.

    Args:
        table: an SQLAlchemy Table to process
        engine: an SQLAlchemy Engine
        configoptions: an instance of :class:`PcmisConfigOptions`
    """
    tablename = table.name
    column_names = table.columns.keys()
    log.debug(f"TABLE: {tablename}; COLUMNS: {column_names}")

    existing_pk_cols = get_pk_colnames(table)
    assert len(existing_pk_cols) < 2, (
        f"Table {tablename} has >1 PK column; don't know what to do")
    if existing_pk_cols and not get_effective_int_pk_col(table):
        raise ValueError(f"Table {table!r} has a non-integer PK")
    adding_crate_pk = not existing_pk_cols

    required_cols = [CRATE_COL_PK] if not configoptions.print_sql_only else []

    if configoptions.drop_not_create:
        # ---------------------------------------------------------------------
        # DROP STUFF! Opposite order to creation (below)
        # ---------------------------------------------------------------------
        drop_indexes(engine, table, [CRATE_IDX_PK])
        drop_columns(engine, table, [CRATE_COL_PK])
    else:
        # ---------------------------------------------------------------------
        # CREATE STUFF!
        # ---------------------------------------------------------------------
        # SQL Server requires Table-bound columns in order to generate DDL:
        if adding_crate_pk:
            crate_pk_col = make_bigint_autoincrement_column(
                CRATE_COL_PK, engine.dialect)
            table.append_column(crate_pk_col)
            add_columns(engine, table, [crate_pk_col])
        ensure_columns_present(engine,
                               tablename=table.name,
                               column_names=required_cols)
        add_indexes(engine, table, [{
            'index_name': CRATE_IDX_PK,
            'column': CRATE_COL_PK,
            'unique': True
        }])
Beispiel #21
0
    def create_table(self,
                     table_name,
                     primary_id='id',
                     primary_type='Integer'):
        """
        Creates a new table. The new table will automatically have an `id` column 
        unless specified via optional parameter primary_id, which will be used 
        as the primary key of the table. Automatic id is set to be an 
        auto-incrementing integer, while the type of custom primary_id can be a 
        Text or an Integer as specified with primary_type flag. 
        The caller will be responsible for the uniqueness of manual primary_id.

        This custom id feature is only available via direct create_table call. 

        Returns a :py:class:`Table <dataset.Table>` instance.
        ::

            table = db.create_table('population')

            # custom id and type
            table2 = db.create_table('population2', 'age')
            table3 = db.create_table('population3', primary_id='race', primary_type='Text')
        """
        self._acquire()
        try:
            log.debug("Creating table: %s on %r" % (table_name, self.engine))
            table = SQLATable(table_name, self.metadata)
            if primary_type is 'Integer':
                auto_flag = False
                if primary_id is 'id':
                    auto_flag = True
                col = Column(primary_id,
                             Integer,
                             primary_key=True,
                             autoincrement=auto_flag)
            elif primary_type is 'Text':
                col = Column(primary_id, Text, primary_key=True)
            else:
                raise DatasetException(
                    "The primary_type has to be either 'Integer' or 'Text'.")

            table.append_column(col)
            table.create(self.engine)
            self._tables[table_name] = table
            return Table(self, table)
        finally:
            self._release()
Beispiel #22
0
class TableHandler(object):
    """ Used by automatically generated objects such as datasets
    and dimensions to generate, write and clear the table under
    its management. """
    def _init_table(self, meta, namespace, name, id_type=Integer):
        """ Create the given table if it does not exist, otherwise
        reflect the current table schema from the database.
        """
        name = namespace + '__' + name
        self.table = Table(name, meta)
        if id_type is not None:
            col = Column('id', id_type, primary_key=True)
            self.table.append_column(col)

    def _generate_table(self):
        """ Create the given table if it does not exist. """
        # TODO: make this support some kind of migration?
        if not db.engine.has_table(self.table.name):
            self.table.create(db.engine)

    def _upsert(self, bind, data, unique_columns):
        """ Upsert a set of values into the table. This will
        query for the set of unique columns and either update an
        existing row or create a new one. In both cases, the ID
        of the changed row will be returned. """
        key = and_(*[self.table.c[c] == data.get(c) for c in unique_columns])
        q = self.table.update(key, data)
        if bind.execute(q).rowcount == 0:
            q = self.table.insert(data)
            rs = bind.execute(q)
            return rs.inserted_primary_key[0]
        else:
            q = self.table.select(key)
            row = bind.execute(q).fetchone()
            return row['id']

    def _flush(self, bind):
        """ Delete all rows in the table. """
        q = self.table.delete()
        bind.execute(q)

    def _drop(self, bind):
        """ Drop the table and the local reference to it. """
        if db.engine.has_table(self.table.name):
            self.table.drop()
        del self.table
Beispiel #23
0
    def create_table(self, table_name):
        """
        Creates a new table. The new table will automatically have an `id` column, which is
        set to be an auto-incrementing integer as the primary key of the table.

        Returns a :py:class:`Table <dataset.Table>` instance.
        ::

            table = db.create_table('population')
        """
        with self.lock:
            log.debug("Creating table: %s on %r" % (table_name, self.engine))
            table = SQLATable(table_name, self.metadata)
            col = Column('id', Integer, primary_key=True)
            table.append_column(col)
            table.create(self.engine)
            self._tables[table_name] = table
            return Table(self, table)
Beispiel #24
0
def unit_tests() -> None:
    from sqlalchemy.dialects.mssql.base import MSDialect
    from sqlalchemy.dialects.mysql.base import MySQLDialect
    d_mssql = MSDialect()
    d_mysql = MySQLDialect()
    col1 = Column('hello', BigInteger, nullable=True)
    col2 = Column('world', BigInteger,
                  autoincrement=True)  # does NOT generate IDENTITY
    col3 = make_bigint_autoincrement_column('you', d_mssql)
    metadata = MetaData()
    t = Table('mytable', metadata)
    t.append_column(col1)
    t.append_column(col2)
    t.append_column(col3)

    print("Checking Column -> DDL: SQL Server (mssql)")
    test_assert(column_creation_ddl(col1, d_mssql), "hello BIGINT NULL")
    test_assert(column_creation_ddl(col2, d_mssql), "world BIGINT NULL")
    test_assert(column_creation_ddl(col3, d_mssql),
                "you BIGINT NOT NULL IDENTITY(1,1)")

    print("Checking Column -> DDL: MySQL (mysql)")
    test_assert(column_creation_ddl(col1, d_mysql), "hello BIGINT")
    test_assert(column_creation_ddl(col2, d_mysql), "world BIGINT")
    # not col3; unsupported

    print("Checking SQL type -> SQL Alchemy type")
    to_check = [
        # mssql
        ("BIGINT", d_mssql),
        ("NVARCHAR(32)", d_mssql),
        ("NVARCHAR(MAX)", d_mssql),
        ('NVARCHAR(160) COLLATE "Latin1_General_CI_AS"', d_mssql),
        # mysql
        ("BIGINT", d_mssql),
        ("LONGTEXT", d_mysql),
    ]
    for coltype, dialect in to_check:
        print("... {} -> dialect {} -> {}".format(
            repr(coltype),
            repr(dialect.name),
            repr(get_sqla_coltype_from_dialect_str(coltype, dialect))))
Beispiel #25
0
    def create_table(self, table_name, primary_id='id', primary_type='Integer'):
        """
        Creates a new table. The new table will automatically have an `id` column 
        unless specified via optional parameter primary_id, which will be used 
        as the primary key of the table. Automatic id is set to be an 
        auto-incrementing integer, while the type of custom primary_id can be a 
        Text or an Integer as specified with primary_type flag. 
        The caller will be responsible for the uniqueness of manual primary_id.

        This custom id feature is only available via direct create_table call. 

        Returns a :py:class:`Table <dataset.Table>` instance.
        ::

            table = db.create_table('population')

            # custom id and type
            table2 = db.create_table('population2', 'age')
            table3 = db.create_table('population3', primary_id='race', primary_type='Text')
        """
        self._acquire()
        try:
            log.debug("Creating table: %s on %r" % (table_name, self.engine))
            table = SQLATable(table_name, self.metadata)
            if primary_type is 'Integer':
                auto_flag = False
                if primary_id is 'id':
                    auto_flag = True
                col = Column(primary_id, Integer, primary_key=True, autoincrement=auto_flag)
            elif primary_type is 'Text':
                col = Column(primary_id, Text, primary_key=True)
            else:
                raise DatasetException(
                    "The primary_type has to be either 'Integer' or 'Text'.")

            table.append_column(col)
            table.create(self.engine)
            self._tables[table_name] = table
            return Table(self, table)
        finally:
            self._release()
Beispiel #26
0
    def create_table(self, table_name):
        """
        Creates a new table. The new table will automatically have an `id` column, which is
        set to be an auto-incrementing integer as the primary key of the table.

        Returns a :py:class:`Table <dataset.Table>` instance.
        ::

            table = db.create_table('population')
        """
        self._acquire()
        try:
            log.debug("Creating table: %s on %r" % (table_name, self.engine))
            table = SQLATable(table_name, self.metadata)
            col = Column('id', Integer, primary_key=True)
            table.append_column(col)
            table.create(self.engine)
            self._tables[table_name] = table
            return Table(self, table)
        finally:
            self._release()
Beispiel #27
0
def create_view(name, selectable, metadata, materialized=False):
    '''
    Args:
        name: String            => name of materialized view to create
        selectable: FromClause  => query to create view as
        metadata: MetaData      => metadata to listen for events on
        materialized: Boolean   => whether to create standard or materialized view
    Returns:
        Table
        Table object bound to temporary MetaData object with columns as
        columns returned from selectable (essentially creates table as view)
        NOTE:
            For non-postgresql backends, creating a materialized view
            will result in a standard view, which cannot be indexed
    Preconditions:
        name is of type String
        selectable is of type FromClause
        metadata is of type Metadata
        materialized is of type Boolean
    '''
    assert isinstance(name, str), 'Name is not of type String'
    assert isinstance(selectable,
                      FromClause), 'Selectable is not of type FromClause'
    assert isinstance(metadata, MetaData), 'Metadata is not of type MetaData'
    assert isinstance(materialized,
                      bool), 'Materialized is not of type Boolean'
    _tmp_mt = MetaData()
    tbl = Table(name, _tmp_mt)
    for c in selectable.c:
        tbl.append_column(Column(c.name, c.type, primary_key=c.primary_key))
    listen(\
        metadata,\
        'after_create',\
        CreateMaterializedViewExpression(name, selectable) if materialized else CreateViewExpression(name, selectable))
    listen(\
        metadata,\
        'before_drop',\
        DropMaterializedViewExpression(name) if materialized else DropViewExpression(name))
    return tbl
Beispiel #28
0
def process_table(table: Table, engine: Engine, progargs: Any) -> None:
    tablename = table.name
    column_names = table.columns.keys()
    log.debug("TABLE: {}; COLUMNS: {}".format(tablename, column_names))

    existing_pk_cols = get_pk_colnames(table)
    assert len(existing_pk_cols) < 2, (
        "Table {} has >1 PK column; don't know what to do".format(tablename))
    if existing_pk_cols and not get_effective_int_pk_col(table):
        raise ValueError("Table {} has a non-integer PK".format(repr(table)))
    adding_crate_pk = not existing_pk_cols

    required_cols = [CRATE_COL_PK] if not progargs.print else []

    if progargs.drop_danger_drop:
        # ---------------------------------------------------------------------
        # DROP STUFF! Opposite order to creation (below)
        # ---------------------------------------------------------------------
        drop_indexes(engine, table, [CRATE_IDX_PK])
        drop_columns(engine, table, [CRATE_COL_PK])
    else:
        # ---------------------------------------------------------------------
        # CREATE STUFF!
        # ---------------------------------------------------------------------
        # SQL Server requires Table-bound columns in order to generate DDL:
        if adding_crate_pk:
            crate_pk_col = make_bigint_autoincrement_column(
                CRATE_COL_PK, engine.dialect)
            table.append_column(crate_pk_col)
            add_columns(engine, table, [crate_pk_col])
        ensure_columns_present(engine,
                               tablename=table.name,
                               column_names=required_cols)
        add_indexes(engine, table, [{
            'index_name': CRATE_IDX_PK,
            'column': CRATE_COL_PK,
            'unique': True
        }])
Beispiel #29
0
 def linktab(self):
     if not hasattr(self, '_linktab'):
         if self.engine.has_table(self.linktab_name):
             self._linktab = Table(self.linktab_name, self.meta,
                                   autoload=True)
         else:
             table = Table(self.linktab_name, self.meta)
             col = Column('view', Unicode, index=True)
             table.append_column(col)
             col = Column('serial', Unicode(40))
             table.append_column(col)
             col = Column('key', Unicode, index=True)
             table.append_column(col)
             col = Column('fingerprint', Unicode(255), index=True)
             table.append_column(col)
             table.create(self.engine)
             self._linktab = table
     return self._linktab
Beispiel #30
0
    def create_table(self, schema_name, table_name, columns_configuration,
                     drop_first):
        metadata = MetaData()

        table = Table(table_name, metadata, schema=schema_name)

        for column_configuration in columns_configuration:
            table.append_column(
                self.create_column(column_configuration["destination"]))

        table.append_column(
            Column(
                Providers.AuditColumnsNames.TIMESTAMP,
                DateTime(timezone=True),
                server_default=func.now(),
            ))

        table.append_column(
            Column(
                Providers.AuditColumnsNames.IS_DELETED,
                Boolean,
                server_default="f",
                default=False,
            ))

        table.append_column(
            Column(Providers.AuditColumnsNames.CHANGE_VERSION, BigInteger))

        if drop_first:
            self.logger.debug(f"Dropping table {schema_name}.{table_name}")
            table.drop(self.target_db, checkfirst=True)
            self.logger.debug(f"Dropped table {schema_name}.{table_name}")

        self.logger.debug(f"Creating table {schema_name}.{table_name}")
        table.create(self.target_db, checkfirst=False)
        self.logger.debug(f"Created table {schema_name}.{table_name}")

        return
Beispiel #31
0
class FactTable(object):
    """ The ``FactTable`` serves as a controller object for
    a given ``Model``, handling the creation, filling and migration
    of the table schema associated with the dataset. """
    def __init__(self, dataset):
        self.dataset = dataset
        self.bind = db.engine
        self.table_name = '%s__facts' % dataset.name
        self.meta = MetaData()
        self.meta.bind = self.bind
        self._table = None

    @property
    def table(self):
        """ Generate an appropriate table representation to mirror the
        fields known for this table. """
        if self._table is None:
            self._table = Table(self.table_name, self.meta)
            id_col = Column('_id', Unicode(42), primary_key=True)
            self._table.append_column(id_col)
            json_col = Column('_json', Unicode())
            self._table.append_column(json_col)
            self._fields_columns(self._table)
        return self._table

    @property
    def alias(self):
        """ An alias used for queries. """
        if not hasattr(self, '_alias'):
            self._alias = self.table.alias('entry')
        return self._alias

    @property
    def mapping(self):
        if not hasattr(self, '_mapping'):
            self._mapping = {}
            for attribute in self.dataset.model.attributes:
                if attribute.column in self.alias.columns:
                    col = self.alias.c[attribute.column]
                    self._mapping[attribute.path] = col
        return self._mapping

    @property
    def exists(self):
        return db.engine.has_table(self.table.name)

    def _fields_columns(self, table):
        """ Transform the (auto-detected) fields into a set of column
        specifications. """
        for field in self.dataset.fields:
            data_type = TYPES.get(field.get('type'), Unicode)
            col = Column(field.get('name'), data_type, nullable=True)
            table.append_column(col)

    def load_iter(self, iterable, chunk_size=1000):
        """ Bulk load all the data in an artifact to a matching database
        table. """
        chunk = []

        conn = self.bind.connect()
        tx = conn.begin()
        try:
            for i, record in enumerate(iterable):
                record['_id'] = i
                record['_json'] = json.dumps(record, default=json_default)
                chunk.append(record)
                if len(chunk) >= chunk_size:
                    stmt = self.table.insert()
                    conn.execute(stmt, chunk)
                    chunk = []

            if len(chunk):
                stmt = self.table.insert()
                conn.execute(stmt, chunk)
            tx.commit()
        except:
            tx.rollback()
            raise

    def create(self):
        """ Create the fact table if it does not exist. """
        if not self.exists:
            self.table.create(self.bind)

    def drop(self):
        """ Drop the fact table if it does exist. """
        if self.exists:
            self.table.drop()
        self._table = None

    def __repr__(self):
        return "<FactTable(%r)>" % (self.dataset)
Beispiel #32
0
def gen_sqla_info(cls, cls_bases=()):
    """Return SQLAlchemy table object corresponding to the passed Spyne object.
    Also maps given class to the returned table.
    """

    metadata = cls.Attributes.sqla_metadata
    table_name = cls.Attributes.table_name

    inc = [] # include_properties

    # check inheritance
    inheritance = None
    base_class = getattr(cls, '__extends__', None)
    if base_class is None:
        for b in cls_bases:
            if getattr(b, '_type_info', None) is not None and b.__mixin__:
                base_class = b

    if base_class is not None:
        base_table_name = base_class.Attributes.table_name
        if base_table_name is not None:
            if base_table_name == table_name:
                inheritance = _SINGLE
            else:
                inheritance = _JOINED
                raise NotImplementedError("Joined table inheritance is not yet "
                                          "implemented.")
            inc_prop = base_class.Attributes.sqla_mapper.include_properties
            if inc_prop is not None:
                inc.extend(inc_prop)

            exc_prop = base_class.Attributes.sqla_mapper.exclude_properties
            if exc_prop is not None:
                inc = [_p for _p in inc if not _p in exc_prop]

    # check whether the object already has a table
    table = None
    if table_name in metadata.tables:
        table = metadata.tables[table_name]
    else:
        # We need FakeTable because table_args can contain all sorts of stuff
        # that can require a fully-constructed table, and we don't have that
        # information here yet.
        table = _FakeTable()

    # check whether the base classes are already mapped
    base_mapper = None
    if base_class is not None:
        base_mapper = base_class.Attributes.sqla_mapper

    if base_mapper is None:
        for b in cls_bases:
            bm = _mapper_registry.get(b, None)
            if bm is not None:
                assert base_mapper is None, "There can be only one base mapper."
                base_mapper = bm
                inheritance = _SINGLE

    props = {}

    # For each Spyne field
    for k, v in cls._type_info.items():
        if v.Attributes.exc_table:
            continue

        col_args, col_kwargs = sanitize_args(v.Attributes.sqla_column_args)
        _sp_attrs_to_sqla_constraints(cls, v, col_kwargs)

        t = get_sqlalchemy_type(v)

        if t is None:
            p = getattr(v.Attributes, 'store_as', None)
            if p is not None and issubclass(v, Array) and isinstance(p, c_table):
                child_cust, = v._type_info.values()
                if child_cust.__orig__ is not None:
                    child = child_cust.__orig__
                else:
                    child = child_cust

                if p.multi != False: # many to many
                    col_own, col_child = _get_cols_m2m(cls, k, v, p.left, p.right)

                    p.left = col_own.key
                    p.right = col_child.key

                    if p.multi == True:
                        rel_table_name = '_'.join([cls.Attributes.table_name, k])
                    else:
                        rel_table_name = p.multi

                    # FIXME: Handle the case where the table already exists.
                    rel_t = Table(rel_table_name, metadata,
                                                          *(col_own, col_child))

                    props[k] = relationship(child, secondary=rel_t,
                                                              backref=p.backref)

                elif issubclass(child, SimpleModel): # one to many simple type
                    # get left (fk) column info
                    _gen_col = _get_col_o2m(cls, p.left)
                    col_info = _gen_col.next() # gets the column name
                    p.left, child_left_col_type = col_info[0] # FIXME: Add support for multi-column primary keys.
                    child_left_col_name = p.left

                    # get right(data) column info
                    child_right_col_type = get_sqlalchemy_type(child_cust)
                    child_right_col_name = p.right # this is the data column
                    if child_right_col_name is None:
                        child_right_col_name = k

                    # get table name
                    child_table_name = child_cust.Attributes.table_name
                    if child_table_name is None:
                        child_table_name = '_'.join([table_name, k])

                    if child_table_name in metadata.tables:
                        # table exists, get releavant info
                        child_t = metadata.tables[child_table_name]
                        assert child_right_col_type is \
                               child_t.c[child_right_col_name].type.__class__
                        assert child_left_col_type is \
                               child_t.c[child_left_col_name].type.__class__

                        child_right_col = child_t.c[child_right_col_name]
                        child_left_col = child_t.c[child_left_col_name]

                    else:
                        # table does not exist, generate table
                        child_right_col = Column(child_right_col_name,
                                                        child_right_col_type)
                        _sp_attrs_to_sqla_constraints(cls, child_cust,
                                                            col=child_right_col)

                        child_left_col = _gen_col.next()
                        _sp_attrs_to_sqla_constraints(cls, child_cust,
                                                            col=child_left_col)

                        child_t = Table(child_table_name , metadata,
                            Column('id', sqlalchemy.Integer, primary_key=True),
                                                child_left_col, child_right_col)

                    # generate temporary class for association proxy
                    cls_name = ''.join(x.capitalize() or '_' for x in
                                                    child_table_name.split('_'))
                                            # generates camelcase class name.

                    def _i(self, *args):
                        setattr(self, child_right_col_name, args[0])

                    cls_ = type("_" + cls_name, (object,), {'__init__': _i})
                    own_mapper(cls_)(cls_, child_t)
                    props["_" + k] = relationship(cls_)

                    # generate association proxy
                    setattr(cls, k, association_proxy("_" + k, child_right_col_name))


                else: # one to many complex type
                    _gen_col = _get_col_o2m(cls, p.right)
                    col_info = _gen_col.next() # gets the column name
                    p.right, col_type = col_info[0] # FIXME: Add support for multi-column primary keys.

                    assert p.left is None, \
                        "'left' is ignored in one-to-many relationships " \
                        "with complex types (because they already have a " \
                        "table). You probably meant to use 'right'."

                    child_t = child.__table__

                    if p.right in child_t.c:
                        # FIXME: This branch MUST be tested.
                        assert col_type is child_t.c[p.right].type.__class__

                        # if the column is there, the decision about whether
                        # it should be in child's mapper should also have been
                        # made.
                        #
                        # so, not adding the child column to to child mapper
                        # here.
                        col = child_t.c[p.right]

                    else:
                        col = _gen_col.next()

                        _sp_attrs_to_sqla_constraints(cls, child_cust, col=col)

                        child_t.append_column(col)
                        child.__mapper__.add_property(col.name, col)

                    props[k] = relationship(child, foreign_keys=[col],
                                                              backref=p.backref)

            elif p is not None and issubclass(v, ComplexModelBase):
                # v has the Attribute values we need whereas real_v is what the
                # user instantiates (thus what sqlalchemy needs)
                if v.__orig__ is None: # vanilla class
                    real_v = v
                else: # customized class
                    real_v = v.__orig__

                if isinstance(p, c_table):
                    assert not getattr(p, 'multi', False), (
                                        'Storing a single element-type using a '
                                        'relation table is pointless.')

                    assert p.right is None, "'right' is ignored in a one-to-one " \
                                            "relationship"

                    col = _get_col_o2o(cls, k, v, p.left)
                    rel = relationship(real_v, uselist=False,
                                          foreign_keys=[col], backref=p.backref)

                    p.left = col.key
                    props[k] = rel

                elif isinstance(p, c_xml):
                    if k in table.c:
                        col = table.c[k]
                    else:
                        col = Column(k, PGObjectXml(v, p.root_tag, p.no_ns),
                                                        *col_args, **col_kwargs)

                elif isinstance(p, c_json):
                    if k in table.c:
                        col = table.c[k]
                    else:
                        col = Column(k, PGObjectJson(v,
                                            ignore_wrappers=p.ignore_wrappers,
                                            complex_as=p.complex_as
                                        ),
                                        *col_args, **col_kwargs
                            )

                elif isinstance(p, c_msgpack):
                    raise NotImplementedError()

                else:
                    raise ValueError(p)

                props[col.name] = col
                if not k in table.c:
                    table.append_column(col)

            else:
                logger.debug("Skipping %s.%s.%s: %r, store_as: %r" % (
                                                cls.get_namespace(),
                                                cls.get_type_name(), k, v, p))

        else:
            unique = v.Attributes.unique
            index = v.Attributes.index
            if unique and not index:
                index = True

            try:
                index_name, index_method = v.Attributes.index

            except (TypeError, ValueError):
                index_name = "%s_%s%s" % (table_name, k, '_unique' if unique else '')
                index_method = v.Attributes.index

            if k in table.c:
                col = table.c[k]

            else:
                col = Column(k, t, *col_args, **col_kwargs)
                table.append_column(col)

                if index in (False, None):
                    pass

                else:
                    if index == True:
                        index_args = (index_name, col), dict(unique=unique)
                    else:
                        index_args = (index_name, col), dict(unique=unique,
                                                  postgresql_using=index_method)

                    if isinstance(table, _FakeTable):
                        table.indexes.append(index_args)
                    else:
                        Index(*index_args[0], **index_args[1])

            if not v.Attributes.exc_mapper:
                props[k] = col

    if isinstance(table, _FakeTable):
        _table = table
        table_args, table_kwargs = sanitize_args(cls.Attributes.sqla_table_args)
        table = Table(table_name, metadata,
                           *(tuple(table.columns) + table_args), **table_kwargs)

        for index_args, index_kwargs in _table.indexes:
            Index(*index_args, **index_kwargs)

        del _table

    # Map the table to the object
    mapper_args, mapper_kwargs = sanitize_args(cls.Attributes.sqla_mapper_args)

    _props = mapper_kwargs.get('properties', None)
    if _props is None:
        mapper_kwargs['properties'] = props
    else:
        props.update(_props)
        mapper_kwargs['properties'] = props

    _inc = mapper_kwargs.get('include_properties', None)
    if _inc is None:
        mapper_kwargs['include_properties'] = inc + props.keys()

    po = mapper_kwargs.get('polymorphic_on', None)
    if po is not None:
        if not isinstance(po, Column):
            mapper_kwargs['polymorphic_on'] = table.c[po]
        else:
            del mapper_kwargs['polymorphic_on']

    if base_mapper is not None:
        mapper_kwargs['inherits'] = base_mapper

    if inheritance is not _SINGLE:
        mapper_args = (table,) + mapper_args

    cls_mapper = mapper(cls, *mapper_args, **mapper_kwargs)

    def my_load_listener(target, context):
        d = target.__dict__

        for k, v in cls.get_flat_type_info(cls).items():
            if not k in d:
                if isclass(v) and issubclass(v, ComplexModelBase):
                    pass
                else:
                    d[k] = None


    event.listen(cls, 'load', my_load_listener)

    cls.__tablename__ = cls.Attributes.table_name
    cls.Attributes.sqla_mapper = cls.__mapper__ = cls_mapper
    cls.Attributes.sqla_table = cls.__table__ = table

    return table
Beispiel #33
0
def gen_sqla_info(cls, cls_bases=()):
    """Return SQLAlchemy table object corresponding to the passed Spyne object.
    Also maps given class to the returned table.
    """

    metadata = cls.Attributes.sqla_metadata
    table_name = cls.Attributes.table_name

    inc = []  # include_properties

    # check inheritance
    inheritance = None
    base_class = getattr(cls, '__extends__', None)
    if base_class is None:
        for b in cls_bases:
            if getattr(b, '_type_info', None) is not None and b.__mixin__:
                base_class = b

    if base_class is not None:
        base_table_name = base_class.Attributes.table_name
        if base_table_name is not None:
            if base_table_name == table_name:
                inheritance = _SINGLE
            else:
                inheritance = _JOINED
                raise NotImplementedError(
                    "Joined table inheritance is not yet "
                    "implemented.")
            inc_prop = base_class.Attributes.sqla_mapper.include_properties
            if inc_prop is not None:
                inc.extend(inc_prop)

            exc_prop = base_class.Attributes.sqla_mapper.exclude_properties
            if exc_prop is not None:
                inc = [_p for _p in inc if not _p in exc_prop]

    # check whether the object already has a table
    table = None
    if table_name in metadata.tables:
        table = metadata.tables[table_name]
    else:
        # We need FakeTable because table_args can contain all sorts of stuff
        # that can require a fully-constructed table, and we don't have that
        # information here yet.
        table = _FakeTable()

    # check whether the base classes are already mapped
    base_mapper = None
    if base_class is not None:
        base_mapper = base_class.Attributes.sqla_mapper

    if base_mapper is None:
        for b in cls_bases:
            bm = _mapper_registry.get(b, None)
            if bm is not None:
                assert base_mapper is None, "There can be only one base mapper."
                base_mapper = bm
                inheritance = _SINGLE

    props = {}

    # For each Spyne field
    for k, v in cls._type_info.items():
        if v.Attributes.exc_table:
            continue

        col_args, col_kwargs = sanitize_args(v.Attributes.sqla_column_args)
        _sp_attrs_to_sqla_constraints(cls, v, col_kwargs)

        t = get_sqlalchemy_type(v)

        if t is None:
            p = getattr(v.Attributes, 'store_as', None)
            if p is not None and issubclass(v, Array) and isinstance(
                    p, c_table):
                child_cust, = v._type_info.values()
                if child_cust.__orig__ is not None:
                    child = child_cust.__orig__
                else:
                    child = child_cust

                if p.multi != False:  # many to many
                    col_own, col_child = _get_cols_m2m(cls, k, v, p.left,
                                                       p.right)

                    p.left = col_own.key
                    p.right = col_child.key

                    if p.multi == True:
                        rel_table_name = '_'.join(
                            [cls.Attributes.table_name, k])
                    else:
                        rel_table_name = p.multi

                    # FIXME: Handle the case where the table already exists.
                    rel_t = Table(rel_table_name, metadata,
                                  *(col_own, col_child))

                    props[k] = relationship(child,
                                            secondary=rel_t,
                                            backref=p.backref)

                elif issubclass(child, SimpleModel):  # one to many simple type
                    # get left (fk) column info
                    _gen_col = _get_col_o2m(cls, p.left)
                    col_info = _gen_col.next()  # gets the column name
                    p.left, child_left_col_type = col_info[
                        0]  # FIXME: Add support for multi-column primary keys.
                    child_left_col_name = p.left

                    # get right(data) column info
                    child_right_col_type = get_sqlalchemy_type(child_cust)
                    child_right_col_name = p.right  # this is the data column
                    if child_right_col_name is None:
                        child_right_col_name = k

                    # get table name
                    child_table_name = child_cust.Attributes.table_name
                    if child_table_name is None:
                        child_table_name = '_'.join([table_name, k])

                    if child_table_name in metadata.tables:
                        # table exists, get releavant info
                        child_t = metadata.tables[child_table_name]
                        assert child_right_col_type is \
                               child_t.c[child_right_col_name].type.__class__
                        assert child_left_col_type is \
                               child_t.c[child_left_col_name].type.__class__

                        child_right_col = child_t.c[child_right_col_name]
                        child_left_col = child_t.c[child_left_col_name]

                    else:
                        # table does not exist, generate table
                        child_right_col = Column(child_right_col_name,
                                                 child_right_col_type)
                        _sp_attrs_to_sqla_constraints(cls,
                                                      child_cust,
                                                      col=child_right_col)

                        child_left_col = _gen_col.next()
                        _sp_attrs_to_sqla_constraints(cls,
                                                      child_cust,
                                                      col=child_left_col)

                        child_t = Table(
                            child_table_name, metadata,
                            Column('id', sqlalchemy.Integer, primary_key=True),
                            child_left_col, child_right_col)

                    # generate temporary class for association proxy
                    cls_name = ''.join(x.capitalize() or '_'
                                       for x in child_table_name.split('_'))

                    # generates camelcase class name.

                    def _i(self, *args):
                        setattr(self, child_right_col_name, args[0])

                    cls_ = type("_" + cls_name, (object, ), {'__init__': _i})
                    own_mapper(cls_)(cls_, child_t)
                    props["_" + k] = relationship(cls_)

                    # generate association proxy
                    setattr(cls, k,
                            association_proxy("_" + k, child_right_col_name))

                else:  # one to many complex type
                    _gen_col = _get_col_o2m(cls, p.right)
                    col_info = _gen_col.next()  # gets the column name
                    p.right, col_type = col_info[
                        0]  # FIXME: Add support for multi-column primary keys.

                    assert p.left is None, \
                        "'left' is ignored in one-to-many relationships " \
                        "with complex types (because they already have a " \
                        "table). You probably meant to use 'right'."

                    child_t = child.__table__

                    if p.right in child_t.c:
                        # FIXME: This branch MUST be tested.
                        assert col_type is child_t.c[p.right].type.__class__

                        # if the column is there, the decision about whether
                        # it should be in child's mapper should also have been
                        # made.
                        #
                        # so, not adding the child column to to child mapper
                        # here.
                        col = child_t.c[p.right]

                    else:
                        col = _gen_col.next()

                        _sp_attrs_to_sqla_constraints(cls, child_cust, col=col)

                        child_t.append_column(col)
                        child.__mapper__.add_property(col.name, col)

                    props[k] = relationship(child,
                                            foreign_keys=[col],
                                            backref=p.backref)

            elif p is not None and issubclass(v, ComplexModelBase):
                # v has the Attribute values we need whereas real_v is what the
                # user instantiates (thus what sqlalchemy needs)
                if v.__orig__ is None:  # vanilla class
                    real_v = v
                else:  # customized class
                    real_v = v.__orig__

                if isinstance(p, c_table):
                    assert not getattr(p, 'multi', False), (
                        'Storing a single element-type using a '
                        'relation table is pointless.')

                    assert p.right is None, "'right' is ignored in a one-to-one " \
                                            "relationship"

                    col = _get_col_o2o(cls, k, v, p.left)
                    rel = relationship(real_v,
                                       uselist=False,
                                       foreign_keys=[col],
                                       backref=p.backref)

                    p.left = col.key
                    props[k] = rel
                    _gen_index_info(table, table_name, col, k, v)

                elif isinstance(p, c_xml):
                    if k in table.c:
                        col = table.c[k]
                    else:
                        col = Column(k, PGObjectXml(v, p.root_tag, p.no_ns),
                                     *col_args, **col_kwargs)

                elif isinstance(p, c_json):
                    if k in table.c:
                        col = table.c[k]
                    else:
                        col = Column(
                            k,
                            PGObjectJson(v,
                                         ignore_wrappers=p.ignore_wrappers,
                                         complex_as=p.complex_as), *col_args,
                            **col_kwargs)

                elif isinstance(p, c_msgpack):
                    raise NotImplementedError()

                else:
                    raise ValueError(p)

                props[col.name] = col
                if not k in table.c:
                    table.append_column(col)

            else:
                logger.debug(
                    "Skipping %s.%s.%s: %r, store_as: %r" %
                    (cls.get_namespace(), cls.get_type_name(), k, v, p))

        else:
            if k in table.c:
                col = table.c[k]

            else:
                col = Column(k, t, *col_args, **col_kwargs)
                table.append_column(col)
                _gen_index_info(table, table_name, col, k, v)

            if not v.Attributes.exc_mapper:
                props[k] = col

    if isinstance(table, _FakeTable):
        _table = table
        table_args, table_kwargs = sanitize_args(
            cls.Attributes.sqla_table_args)
        table = Table(table_name, metadata,
                      *(tuple(table.columns) + table_args), **table_kwargs)

        for index_args, index_kwargs in _table.indexes:
            Index(*index_args, **index_kwargs)

        del _table

    # Map the table to the object
    mapper_args, mapper_kwargs = sanitize_args(cls.Attributes.sqla_mapper_args)

    _props = mapper_kwargs.get('properties', None)
    if _props is None:
        mapper_kwargs['properties'] = props
    else:
        props.update(_props)
        mapper_kwargs['properties'] = props

    _inc = mapper_kwargs.get('include_properties', None)
    if _inc is None:
        mapper_kwargs['include_properties'] = inc + props.keys()

    po = mapper_kwargs.get('polymorphic_on', None)
    if po is not None:
        if not isinstance(po, Column):
            mapper_kwargs['polymorphic_on'] = table.c[po]
        else:
            del mapper_kwargs['polymorphic_on']

    if base_mapper is not None:
        mapper_kwargs['inherits'] = base_mapper

    if inheritance is not _SINGLE:
        mapper_args = (table, ) + mapper_args

    cls_mapper = mapper(cls, *mapper_args, **mapper_kwargs)

    def my_load_listener(target, context):
        d = target.__dict__

        for k, v in cls.get_flat_type_info(cls).items():
            if not k in d:
                if isclass(v) and issubclass(v, ComplexModelBase):
                    pass
                else:
                    d[k] = None

    event.listen(cls, 'load', my_load_listener)

    cls.__tablename__ = cls.Attributes.table_name
    cls.Attributes.sqla_mapper = cls.__mapper__ = cls_mapper
    cls.Attributes.sqla_table = cls.__table__ = table

    return table
Beispiel #34
0
def _copy_table(table):
    ret_table = Table(table.name, MetaData())
    for c in table.columns:
        ret_table.append_column(_copy_column(c))

    return ret_table
Beispiel #35
0
class SQLTable(Component):
    
    _selects = 0
    _inserts = 0
    _finalized = False
    
    def __init__(self):

        super(SQLTable, self).__init__()

        self._pk = False

        self.name = None
        self.connection = None
        self.columns = [ ]
        
        self.create = True
        
        self.sa_table = None
        self.sa_metadata = None
        
        self._selects = 0
        self._inserts = 0
        self._unicode_errors = 0
    
    def _get_sa_type(self, column):
        
        if (column["type"] == "Integer"):
            return Integer
        elif (column["type"] == "String"):
            if (not "length" in column): column["length"] = 128
            return Unicode(length = column["length"])
        elif (column["type"] == "Float"):
            return Float    
        elif (column["type"] == "Boolean"):
            return Boolean
        elif (column["type"] == "AutoIncrement"):
            return Integer
        else:
            raise Exception("Invalid data type: %s" % column["type"])
    
    def finalize(self, ctx):
        
        if (not SQLTable._finalized):
            SQLTable._finalized = True
            if (SQLTable._inserts + SQLTable._selects > 0):
                logger.info("SQLTable Totals  inserts/selects: %d/%d " % 
                            (SQLTable._inserts, SQLTable._selects))
        
        if (self._inserts + self._selects > 0):
            logger.info("SQLTable %-18s inserts/selects: %6d/%-6d " % 
                            (self.name, self._inserts, self._selects))
        if (self._unicode_errors > 0):
            logger.warn("SQLTable %s found %d warnings assigning non-unicode fields to unicode columns" % 
                        (self.name, self._unicode_errors))
        
        ctx.comp.finalize(self.connection)     
        
        super(SQLTable, self).finalize(ctx)
    
    def initialize(self, ctx):
        
        super(SQLTable, self).initialize(ctx)
        
        ctx.comp.initialize(self.connection) 
        
        logger.debug("Loading table %s on %s" % (self.name, self))
        
        self.sa_metadata = MetaData()
        self.sa_table = Table(self.name, self.sa_metadata)

        # Drop?

        columns_ex = []
        for column in self.columns:
            
            # Check for duplicate names
            if (column["name"] in columns_ex):
                raise Exception("Duplicate column name %s in %s" % (column["name"], self))
            columns_ex.append(column["name"])
                
            # Configure column            
            column["pk"] = False if (not "pk" in column) else parsebool(column["pk"])
            if (not "type" in column): column["type"] = "String"
            #if (not "value" in column): column["value"] = None
            logger.debug("Adding column %s" % column)
            self.sa_table.append_column( Column(column["name"], 
                                                self._get_sa_type(column), 
                                                primary_key = column["pk"], 
                                                autoincrement = (True if column["type"] == "AutoIncrement" else False) ))
        
        # Check schema
        
        # Create if doesn't exist
        if (not self.connection.engine().has_table(self.name)):
            logger.info("Creating table %s" % self.name) 
            self.sa_table.create(self.connection.connection())
            
        # Extend?
        
        # Delete columns?
            
    def pk(self, ctx):
        """
        Returns the primary key column definitToClauion, or None if none defined.
        """
        
        if (self._pk == False):
            pk_cols = []
            for col in self.columns:
                if ("pk" in col):
                    if parsebool(col["pk"]):
                        pk_cols.append(col)
                        
            if (len(pk_cols) > 1):
                raise Exception("Table %s has multiple primary keys: %s" % (self.name, pk_cols))
            elif (len(pk_cols) == 1):
                self._pk = pk_cols[0]
            else:
                self._pk = None
                
        return self._pk
            
    def _attribsToClause(self, attribs):
        clauses = []
        for k, v in attribs.items():
            if isinstance(v, (list, tuple)):
                clauses.append(self.sa_table.c[k].in_(v))
            else:
                clauses.append(self.sa_table.c[k] == v)
        
        return and_(*clauses)            
            
    def _rowtodict(self, row):
        
        d = {}
        for column in self.columns:
            d[column["name"]] = getattr(row, column["name"])
    
        return d
            
    def _find(self, ctx, attribs):
        
        self._selects = self._selects + 1
        SQLTable._selects = SQLTable._selects + 1
        
        query = self.sa_table.select(self._attribsToClause(attribs))
        rows = self.connection.connection().execute(query)

        for r in rows:
            # Ensure we return dicts, not RowProxys from SqlAlchemy
            yield self._rowtodict(r)
             
        
    def lookup(self, ctx, attribs):
        
        logger.debug ("Lookup on '%s' attribs: %s" % (self, attribs))
        
        if (len(attribs.keys()) == 0):
            raise Exception("Cannot lookup on table with no criteria (empty attribute set)")
        
        rows = self._find(ctx, attribs)
        rows = list(rows)
        if (len(rows) > 1):
            raise Exception("Found more than one row when searching for just one in table %s: %s" % (self.name, attribs))
        elif (len(rows) == 1):
            row = rows[0]   
        else:
            row = None
        
        logger.debug("Lookup result on %s: %s = %s" % (self.name, attribs, row))
        return row
    
    def upsert(self, ctx, data, keys = []):
        
        # TODO: Check for AutoIncrement in keys, shall not be used
        
        # If keys
        qfilter = {}
        if (len(keys) > 0):
            for key in keys:
                try:
                    qfilter[key] = data[key]
                except KeyError as e:
                    raise Exception("Could not find attribute '%s' in data when storing row data: %s" % (key, data))
        
            row = self.lookup(ctx, qfilter)
            if (row): return row
        
        row_with_id =  self.insert(ctx, data)
                        
        return row_with_id
        
    def _prepare_row(self, ctx, data):
        
        row = {}
        
        for column in self.columns:
            if (column["type"] != "AutoIncrement"):
                try:
                    row[column["name"]] = data[column["name"]]
                except KeyError, e:
                    raise Exception("Missing attribute for column %s in table '%s' while inserting row: %s" % (e, self.name, data))
                
                # Checks
                if ((column["type"] == "String") and (not isinstance(row[column["name"]], unicode))):
                    self._unicode_errors = self._unicode_errors + 1 
                    if (ctx.debug):
                        logger.warn("Unicode column %r received non-unicode string: %r " % (column["name"], row[column["name"]]))
                
        return row
Beispiel #36
0
class Tabular(object):
    def __init__(self, schema):
        self.schema = schema
        self.bind = db.engine
        self.meta = MetaData()
        self.meta.bind = self.bind
        self._table = None

    @property
    def table(self):
        """ Generate an appropriate table representation to mirror the
        fields known for this table. """
        if self._table is None:
            self._table = Table(self.schema.table_name, self.meta)
            id_col = Column("_id", Unicode(42), primary_key=True)
            self._table.append_column(id_col)
            for column in self.schema.columns:
                column = Column(column.name, Unicode, nullable=True)
                self._table.append_column(column)
        return self._table

    @property
    def exists(self):
        return db.engine.has_table(self.table.name)

    def load_iter(self, iterable, chunk_size=5000):
        """ Bulk load all the data in an artifact to a matching database
        table. """
        chunk = []

        conn = self.bind.connect()
        tx = conn.begin()
        try:
            for i, record in enumerate(iterable):
                record["_id"] = i
                chunk.append(record)
                if len(chunk) >= chunk_size:
                    stmt = self.table.insert()
                    conn.execute(stmt, chunk)
                    chunk = []

            if len(chunk):
                stmt = self.table.insert()
                conn.execute(stmt, chunk)
            tx.commit()
        except:
            tx.rollback()
            raise

    def create(self):
        """ Create the fact table if it does not exist. """
        if not self.exists:
            self.table.create(self.bind)

    def drop(self):
        """ Drop the fact table if it does exist. """
        if self.exists:
            self.table.drop()
        self._table = None

    def to_dict(self):
        return self.schema.to_dict()

    def __len__(self):
        if not hasattr(self, "_count"):
            q = select(columns=func.count(self.table.c._id), from_obj=self.table)
            rp = db.engine.execute(q)
            self._count = rp.scalar()
        return self._count

    def __iter__(self):
        q = select(columns=self.table.c, from_obj=self.table)
        rp = db.engine.execute(q)
        while True:
            rows = rp.fetchmany(2000)
            if not rows:
                return
            for row in rows:
                yield OrderedDict(row.items())

    def __repr__(self):
        return "<Tabular(%r)>" % self.document
Beispiel #37
0
class FactTable(object):
    """ The ``FactTable`` serves as a controller object for
    a given ``Model``, handling the creation, filling and migration
    of the table schema associated with the dataset. """

    def __init__(self, dataset):
        self.dataset = dataset
        self.bind = db.engine
        self.table_name = '%s__facts' % dataset.name
        self.meta = MetaData()
        self.meta.bind = self.bind
        self._table = None

    @property
    def table(self):
        """ Generate an appropriate table representation to mirror the
        fields known for this table. """
        if self._table is None:
            self._table = Table(self.table_name, self.meta)
            id_col = Column('_id', Unicode(42), primary_key=True)
            self._table.append_column(id_col)
            json_col = Column('_json', Unicode())
            self._table.append_column(json_col)
            self._fields_columns(self._table)
        return self._table

    @property
    def alias(self):
        """ An alias used for queries. """
        if not hasattr(self, '_alias'):
            self._alias = self.table.alias('entry')
        return self._alias

    @property
    def mapping(self):
        if not hasattr(self, '_mapping'):
            self._mapping = {}
            for attribute in self.dataset.model.attributes:
                if attribute.column in self.alias.columns:
                    col = self.alias.c[attribute.column]
                    self._mapping[attribute.path] = col
        return self._mapping

    @property
    def exists(self):
        return db.engine.has_table(self.table.name)

    def _fields_columns(self, table):
        """ Transform the (auto-detected) fields into a set of column
        specifications. """
        for field in self.dataset.fields:
            data_type = TYPES.get(field.get('type'), Unicode)
            col = Column(field.get('name'), data_type, nullable=True)
            table.append_column(col)

    def load_iter(self, iterable, chunk_size=1000):
        """ Bulk load all the data in an artifact to a matching database
        table. """
        chunk = []

        conn = self.bind.connect()
        tx = conn.begin()
        try:
            for i, record in enumerate(iterable):
                record['_id'] = i
                record['_json'] = json.dumps(record, default=json_default)
                chunk.append(record)
                if len(chunk) >= chunk_size:
                    stmt = self.table.insert()
                    conn.execute(stmt, chunk)
                    chunk = []

            if len(chunk):
                stmt = self.table.insert()
                conn.execute(stmt, chunk)
            tx.commit()
        except:
            tx.rollback()
            raise

    def create(self):
        """ Create the fact table if it does not exist. """
        if not self.exists:
            self.table.create(self.bind)

    def drop(self):
        """ Drop the fact table if it does exist. """
        if self.exists:
            self.table.drop()
        self._table = None

    def __repr__(self):
        return "<FactTable(%r)>" % (self.dataset)
Beispiel #38
0
class Table(object):
    """Represents a table in a database and exposes common operations."""

    PRIMARY_DEFAULT = "id"

    def __init__(
        self,
        database,
        table_name,
        primary_id=None,
        primary_type=None,
        primary_increment=None,
        auto_create=False,
    ):
        """Initialise the table from database schema."""
        self.db = database
        self.name = normalize_table_name(table_name)
        self._table = None
        self._columns = None
        self._indexes = []
        self._primary_id = (primary_id if primary_id is not None else
                            self.PRIMARY_DEFAULT)
        self._primary_type = primary_type if primary_type is not None else Types.integer
        if primary_increment is None:
            primary_increment = self._primary_type in (Types.integer,
                                                       Types.bigint)
        self._primary_increment = primary_increment
        self._auto_create = auto_create

    @property
    def exists(self):
        """Check to see if the table currently exists in the database."""
        if self._table is not None:
            return True
        return self.name in self.db

    @property
    def table(self):
        """Get a reference to the table, which may be reflected or created."""
        if self._table is None:
            self._sync_table(())
        return self._table

    @property
    def _column_keys(self):
        """Get a dictionary of all columns and their case mapping."""
        if not self.exists:
            return {}
        with self.db.lock:
            if self._columns is None:
                # Initialise the table if it doesn't exist
                table = self.table
                self._columns = {}
                for column in table.columns:
                    name = normalize_column_name(column.name)
                    key = normalize_column_key(name)
                    if key in self._columns:
                        log.warning("Duplicate column: %s", name)
                    self._columns[key] = name
            return self._columns

    @property
    def columns(self):
        """Get a listing of all columns that exist in the table."""
        return list(self._column_keys.values())

    def has_column(self, column):
        """Check if a column with the given name exists on this table."""
        key = normalize_column_key(normalize_column_name(column))
        return key in self._column_keys

    def _get_column_name(self, name):
        """Find the best column name with case-insensitive matching."""
        name = normalize_column_name(name)
        key = normalize_column_key(name)
        return self._column_keys.get(key, name)

    def insert(self, row, ensure=None, types=None):
        """Add a ``row`` dict by inserting it into the table.

        If ``ensure`` is set, any of the keys of the row are not
        table columns, they will be created automatically.

        During column creation, ``types`` will be checked for a key
        matching the name of a column to be created, and the given
        SQLAlchemy column type will be used. Otherwise, the type is
        guessed from the row value, defaulting to a simple unicode
        field.
        ::

            data = dict(title='I am a banana!')
            table.insert(data)

        Returns the inserted row's primary key.
        """
        row = self._sync_columns(row, ensure, types=types)
        res = self.db.executable.execute(self.table.insert(row))
        if len(res.inserted_primary_key) > 0:
            return res.inserted_primary_key[0]
        return True

    def insert_ignore(self, row, keys, ensure=None, types=None):
        """Add a ``row`` dict into the table if the row does not exist.

        If rows with matching ``keys`` exist no change is made.

        Setting ``ensure`` results in automatically creating missing columns,
        i.e., keys of the row are not table columns.

        During column creation, ``types`` will be checked for a key
        matching the name of a column to be created, and the given
        SQLAlchemy column type will be used. Otherwise, the type is
        guessed from the row value, defaulting to a simple unicode
        field.
        ::

            data = dict(id=10, title='I am a banana!')
            table.insert_ignore(data, ['id'])
        """
        row = self._sync_columns(row, ensure, types=types)
        if self._check_ensure(ensure):
            self.create_index(keys)
        args, _ = self._keys_to_args(row, keys)
        if self.count(**args) == 0:
            return self.insert(row, ensure=False)
        return False

    def insert_many(self, rows, chunk_size=1000, ensure=None, types=None):
        """Add many rows at a time.

        This is significantly faster than adding them one by one. Per default
        the rows are processed in chunks of 1000 per commit, unless you specify
        a different ``chunk_size``.

        See :py:meth:`insert() <dataset.Table.insert>` for details on
        the other parameters.
        ::

            rows = [dict(name='Dolly')] * 10000
            table.insert_many(rows)
        """
        # Sync table before inputting rows.
        sync_row = {}
        for row in rows:
            # Only get non-existing columns.
            sync_keys = list(sync_row.keys())
            for key in [k for k in row.keys() if k not in sync_keys]:
                # Get a sample of the new column(s) from the row.
                sync_row[key] = row[key]
        self._sync_columns(sync_row, ensure, types=types)

        # Get columns name list to be used for padding later.
        columns = sync_row.keys()

        chunk = []
        for index, row in enumerate(rows):
            chunk.append(row)

            # Insert when chunk_size is fulfilled or this is the last row
            if len(chunk) == chunk_size or index == len(rows) - 1:
                chunk = pad_chunk_columns(chunk, columns)
                self.table.insert().execute(chunk)
                chunk = []

    def update(self, row, keys, ensure=None, types=None, return_count=False):
        """Update a row in the table.

        The update is managed via the set of column names stated in ``keys``:
        they will be used as filters for the data to be updated, using the
        values in ``row``.
        ::

            # update all entries with id matching 10, setting their title
            # columns
            data = dict(id=10, title='I am a banana!')
            table.update(data, ['id'])

        If keys in ``row`` update columns not present in the table, they will
        be created based on the settings of ``ensure`` and ``types``, matching
        the behavior of :py:meth:`insert() <dataset.Table.insert>`.
        """
        row = self._sync_columns(row, ensure, types=types)
        args, row = self._keys_to_args(row, keys)
        clause = self._args_to_clause(args)
        if not len(row):
            return self.count(clause)
        stmt = self.table.update(whereclause=clause, values=row)
        rp = self.db.executable.execute(stmt)
        if rp.supports_sane_rowcount():
            return rp.rowcount
        if return_count:
            return self.count(clause)

    def update_many(self,
                    rows,
                    keys,
                    chunk_size=1000,
                    ensure=None,
                    types=None):
        """Update many rows in the table at a time.

        This is significantly faster than updating them one by one. Per default
        the rows are processed in chunks of 1000 per commit, unless you specify
        a different ``chunk_size``.

        See :py:meth:`update() <dataset.Table.update>` for details on
        the other parameters.
        """
        keys = ensure_list(keys)

        chunk = []
        columns = []
        for index, row in enumerate(rows):
            chunk.append(row)
            for col in row.keys():
                if col not in columns:
                    columns.append(col)

            # bindparam requires names to not conflict (cannot be "id" for id)
            for key in keys:
                row["_%s" % key] = row[key]

            # Update when chunk_size is fulfilled or this is the last row
            if len(chunk) == chunk_size or index == len(rows) - 1:
                cl = [self.table.c[k] == bindparam("_%s" % k) for k in keys]
                stmt = self.table.update(
                    whereclause=and_(*cl),
                    values={
                        col: bindparam(col, required=False)
                        for col in columns
                    },
                )
                self.db.executable.execute(stmt, chunk)
                chunk = []

    def upsert(self, row, keys, ensure=None, types=None):
        """An UPSERT is a smart combination of insert and update.

        If rows with matching ``keys`` exist they will be updated, otherwise a
        new row is inserted in the table.
        ::

            data = dict(id=10, title='I am a banana!')
            table.upsert(data, ['id'])
        """
        row = self._sync_columns(row, ensure, types=types)
        if self._check_ensure(ensure):
            self.create_index(keys)
        row_count = self.update(row, keys, ensure=False, return_count=True)
        if row_count == 0:
            return self.insert(row, ensure=False)
        return True

    def upsert_many(self,
                    rows,
                    keys,
                    chunk_size=1000,
                    ensure=None,
                    types=None):
        """
        Sorts multiple input rows into upserts and inserts. Inserts are passed
        to insert and upserts are updated.

        See :py:meth:`upsert() <dataset.Table.upsert>` and
        :py:meth:`insert_many() <dataset.Table.insert_many>`.
        """
        # Removing a bulk implementation in 5e09aba401. Doing this one by one
        # is incredibly slow, but doesn't run into issues with column creation.
        for row in rows:
            self.upsert(row, keys, ensure=ensure, types=types)

    def delete(self, *clauses, **filters):
        """Delete rows from the table.

        Keyword arguments can be used to add column-based filters. The filter
        criterion will always be equality:
        ::

            table.delete(place='Berlin')

        If no arguments are given, all records are deleted.
        """
        if not self.exists:
            return False
        clause = self._args_to_clause(filters, clauses=clauses)
        stmt = self.table.delete(whereclause=clause)
        rp = self.db.executable.execute(stmt)
        return rp.rowcount > 0

    def _reflect_table(self):
        """Load the tables definition from the database."""
        with self.db.lock:
            self._columns = None
            try:
                self._table = SQLATable(self.name,
                                        self.db.metadata,
                                        schema=self.db.schema,
                                        autoload=True)
            except NoSuchTableError:
                self._table = None

    def _threading_warn(self):
        if self.db.in_transaction and threading.active_count() > 1:
            warnings.warn(
                "Changing the database schema inside a transaction "
                "in a multi-threaded environment is likely to lead "
                "to race conditions and synchronization issues.",
                RuntimeWarning,
            )

    def _sync_table(self, columns):
        """Lazy load, create or adapt the table structure in the database."""
        if self._table is None:
            # Load an existing table from the database.
            self._reflect_table()
        if self._table is None:
            # Create the table with an initial set of columns.
            if not self._auto_create:
                raise DatasetException("Table does not exist: %s" % self.name)
            # Keep the lock scope small because this is run very often.
            with self.db.lock:
                self._threading_warn()
                self._table = SQLATable(self.name,
                                        self.db.metadata,
                                        schema=self.db.schema)
                if self._primary_id is not False:
                    # This can go wrong on DBMS like MySQL and SQLite where
                    # tables cannot have no columns.
                    column = Column(
                        self._primary_id,
                        self._primary_type,
                        primary_key=True,
                        autoincrement=self._primary_increment,
                    )
                    self._table.append_column(column)
                for column in columns:
                    if not column.name == self._primary_id:
                        self._table.append_column(column)
                self._table.create(self.db.executable, checkfirst=True)
                self._columns = None
        elif len(columns):
            with self.db.lock:
                self._reflect_table()
                self._threading_warn()
                for column in columns:
                    if not self.has_column(column.name):
                        self.db.op.add_column(self.name, column,
                                              self.db.schema)
                self._reflect_table()

    def _sync_columns(self, row, ensure, types=None):
        """Create missing columns (or the table) prior to writes.

        If automatic schema generation is disabled (``ensure`` is ``False``),
        this will remove any keys from the ``row`` for which there is no
        matching column.
        """
        ensure = self._check_ensure(ensure)
        types = types or {}
        types = {self._get_column_name(k): v for (k, v) in types.items()}
        out = {}
        sync_columns = {}
        for name, value in row.items():
            name = self._get_column_name(name)
            if self.has_column(name):
                out[name] = value
            elif ensure:
                _type = types.get(name)
                if _type is None:
                    _type = self.db.types.guess(value)
                sync_columns[name] = Column(name, _type)
                out[name] = value
        self._sync_table(sync_columns.values())
        return out

    def _check_ensure(self, ensure):
        if ensure is None:
            return self.db.ensure_schema
        return ensure

    def _generate_clause(self, column, op, value):
        if op in ("like", ):
            return self.table.c[column].like(value)
        if op in ("ilike", ):
            return self.table.c[column].ilike(value)
        if op in ("notlike", ):
            return self.table.c[column].notlike(value)
        if op in ("notilike", ):
            return self.table.c[column].notilike(value)
        if op in (">", "gt"):
            return self.table.c[column] > value
        if op in ("<", "lt"):
            return self.table.c[column] < value
        if op in (">=", "gte"):
            return self.table.c[column] >= value
        if op in ("<=", "lte"):
            return self.table.c[column] <= value
        if op in ("=", "==", "is"):
            return self.table.c[column] == value
        if op in ("!=", "<>", "not"):
            return self.table.c[column] != value
        if op in ("in", ):
            return self.table.c[column].in_(value)
        if op in ("notin", ):
            return self.table.c[column].notin_(value)
        if op in ("between", ".."):
            start, end = value
            return self.table.c[column].between(start, end)
        if op in ("startswith", ):
            return self.table.c[column].like("%" + value)
        if op in ("endswith", ):
            return self.table.c[column].like(value + "%")
        return false()

    def _args_to_clause(self, args, clauses=()):
        clauses = list(clauses)
        for column, value in args.items():
            column = self._get_column_name(column)
            if not self.has_column(column):
                clauses.append(false())
            elif isinstance(value, (list, tuple, set)):
                clauses.append(self._generate_clause(column, "in", value))
            elif isinstance(value, dict):
                for op, op_value in value.items():
                    clauses.append(self._generate_clause(column, op, op_value))
            else:
                clauses.append(self._generate_clause(column, "=", value))
        return and_(*clauses)

    def _args_to_order_by(self, order_by):
        orderings = []
        for ordering in ensure_list(order_by):
            if ordering is None:
                continue
            column = ordering.lstrip("-")
            column = self._get_column_name(column)
            if not self.has_column(column):
                continue
            if ordering.startswith("-"):
                orderings.append(self.table.c[column].desc())
            else:
                orderings.append(self.table.c[column].asc())
        return orderings

    def _keys_to_args(self, row, keys):
        keys = [self._get_column_name(k) for k in ensure_list(keys)]
        row = row.copy()
        args = {k: row.pop(k, None) for k in keys}
        return args, row

    def create_column(self, name, type, **kwargs):
        """Create a new column ``name`` of a specified type.
        ::

            table.create_column('created_at', db.types.datetime)

        `type` corresponds to an SQLAlchemy type as described by
        `dataset.db.Types`. Additional keyword arguments are passed
        to the constructor of `Column`, so that default values, and
        options like `nullable` and `unique` can be set.
        ::

            table.create_column('key', unique=True, nullable=False)
            table.create_column('food', default='banana')
        """
        name = self._get_column_name(name)
        if self.has_column(name):
            log.debug("Column exists: %s" % name)
            return
        self._sync_table((Column(name, type, **kwargs), ))

    def create_column_by_example(self, name, value):
        """
        Explicitly create a new column ``name`` with a type that is appropriate
        to store the given example ``value``.  The type is guessed in the same
        way as for the insert method with ``ensure=True``.
        ::

            table.create_column_by_example('length', 4.2)

        If a column of the same name already exists, no action is taken, even
        if it is not of the type we would have created.
        """
        type_ = self.db.types.guess(value)
        self.create_column(name, type_)

    def drop_column(self, name):
        """
        Drop the column ``name``.
        ::

            table.drop_column('created_at')

        """
        if self.db.engine.dialect.name == "sqlite":
            raise RuntimeError("SQLite does not support dropping columns.")
        name = self._get_column_name(name)
        with self.db.lock:
            if not self.exists or not self.has_column(name):
                log.debug("Column does not exist: %s", name)
                return

            self._threading_warn()
            self.db.op.drop_column(self.table.name, name, self.table.schema)
            self._reflect_table()

    def drop(self):
        """Drop the table from the database.

        Deletes both the schema and all the contents within it.
        """
        with self.db.lock:
            if self.exists:
                self._threading_warn()
                self.table.drop(self.db.executable, checkfirst=True)
                self._table = None
                self._columns = None
                self.db._tables.pop(self.name, None)

    def has_index(self, columns):
        """Check if an index exists to cover the given ``columns``."""
        if not self.exists:
            return False
        columns = set([self._get_column_name(c) for c in columns])
        if columns in self._indexes:
            return True
        for column in columns:
            if not self.has_column(column):
                return False
        indexes = self.db.inspect.get_indexes(self.name, schema=self.db.schema)
        for index in indexes:
            if columns == set(index.get("column_names", [])):
                self._indexes.append(columns)
                return True
        return False

    def create_index(self, columns, name=None, **kw):
        """Create an index to speed up queries on a table.

        If no ``name`` is given a random name is created.
        ::

            table.create_index(['name', 'country'])
        """
        columns = [self._get_column_name(c) for c in ensure_list(columns)]
        with self.db.lock:
            if not self.exists:
                raise DatasetException("Table has not been created yet.")

            for column in columns:
                if not self.has_column(column):
                    return

            if not self.has_index(columns):
                self._threading_warn()
                name = name or index_name(self.name, columns)
                columns = [self.table.c[c] for c in columns]

                # MySQL crashes out if you try to index very long text fields,
                # apparently. This defines (a somewhat random) prefix that
                # will be captured by the index, after which I assume the engine
                # conducts a more linear scan:
                mysql_length = {}
                for col in columns:
                    if isinstance(col.type, MYSQL_LENGTH_TYPES):
                        mysql_length[col.name] = 10
                kw["mysql_length"] = mysql_length

                idx = Index(name, *columns, **kw)
                idx.create(self.db.executable)

    def find(self, *_clauses, **kwargs):
        """Perform a simple search on the table.

        Simply pass keyword arguments as ``filter``.
        ::

            results = table.find(country='France')
            results = table.find(country='France', year=1980)

        Using ``_limit``::

            # just return the first 10 rows
            results = table.find(country='France', _limit=10)

        You can sort the results by single or multiple columns. Append a minus
        sign to the column name for descending order::

            # sort results by a column 'year'
            results = table.find(country='France', order_by='year')
            # return all rows sorted by multiple columns (descending by year)
            results = table.find(order_by=['country', '-year'])

        You can also submit filters based on criteria other than equality,
        see :ref:`advanced_filters` for details.

        To run more complex queries with JOINs, or to perform GROUP BY-style
        aggregation, you can also use :py:meth:`db.query() <dataset.Database.query>`
        to run raw SQL queries instead.
        """
        if not self.exists:
            return iter([])

        _limit = kwargs.pop("_limit", None)
        _offset = kwargs.pop("_offset", 0)
        order_by = kwargs.pop("order_by", None)
        _streamed = kwargs.pop("_streamed", False)
        _step = kwargs.pop("_step", QUERY_STEP)
        if _step is False or _step == 0:
            _step = None

        order_by = self._args_to_order_by(order_by)
        args = self._args_to_clause(kwargs, clauses=_clauses)
        query = self.table.select(whereclause=args,
                                  limit=_limit,
                                  offset=_offset)
        if len(order_by):
            query = query.order_by(*order_by)

        conn = self.db.executable
        if _streamed:
            conn = self.db.engine.connect()
            conn = conn.execution_options(stream_results=True)

        return ResultIter(conn.execute(query),
                          row_type=self.db.row_type,
                          step=_step)

    def find_one(self, *args, **kwargs):
        """Get a single result from the table.

        Works just like :py:meth:`find() <dataset.Table.find>` but returns one
        result, or ``None``.
        ::

            row = table.find_one(country='United States')
        """
        if not self.exists:
            return None

        kwargs["_limit"] = 1
        kwargs["_step"] = None
        resiter = self.find(*args, **kwargs)
        try:
            for row in resiter:
                return row
        finally:
            resiter.close()

    def count(self, *_clauses, **kwargs):
        """Return the count of results for the given filter set."""
        # NOTE: this does not have support for limit and offset since I can't
        # see how this is useful. Still, there might be compatibility issues
        # with people using these flags. Let's see how it goes.
        if not self.exists:
            return 0

        args = self._args_to_clause(kwargs, clauses=_clauses)
        query = select([func.count()], whereclause=args)
        query = query.select_from(self.table)
        rp = self.db.executable.execute(query)
        return rp.fetchone()[0]

    def __len__(self):
        """Return the number of rows in the table."""
        return self.count()

    def distinct(self, *args, **_filter):
        """Return all the unique (distinct) values for the given ``columns``.
        ::

            # returns only one row per year, ignoring the rest
            table.distinct('year')
            # works with multiple columns, too
            table.distinct('year', 'country')
            # you can also combine this with a filter
            table.distinct('year', country='China')
        """
        if not self.exists:
            return iter([])

        columns = []
        clauses = []
        for column in args:
            if isinstance(column, ClauseElement):
                clauses.append(column)
            else:
                if not self.has_column(column):
                    raise DatasetException("No such column: %s" % column)
                columns.append(self.table.c[column])

        clause = self._args_to_clause(_filter, clauses=clauses)
        if not len(columns):
            return iter([])

        q = expression.select(
            columns,
            distinct=True,
            whereclause=clause,
            order_by=[c.asc() for c in columns],
        )
        return self.db.query(q)

    # Legacy methods for running find queries.
    all = find

    def __iter__(self):
        """Return all rows of the table as simple dictionaries.

        Allows for iterating over all rows in the table without explicetly
        calling :py:meth:`find() <dataset.Table.find>`.
        ::

            for row in table:
                print(row)
        """
        return self.find()

    def __repr__(self):
        """Get table representation."""
        return "<Table(%s)>" % self.table.name
Beispiel #39
0
class FactTable(object):
    """ The ``FactTable`` serves as a controller object for
    a given ``Model``, handling the creation, filling and migration
    of the table schema associated with the dataset. """

    def __init__(self, dataset):
        self.dataset = dataset
        self.bind = db.engine
        self.meta = MetaData()
        self.meta.bind = self.bind
        self._table = None

    @property
    def table(self):
        """ Generate an appropriate table representation to mirror the
        fields known for this table. """
        if self._table is None:
            name = '%s__facts' % self.dataset.name
            self._table = Table(name, self.meta)
            id_col = Column('_id', Unicode(42), primary_key=True)
            self._table.append_column(id_col)
            json_col = Column('_json', Unicode())
            self._table.append_column(json_col)
            self._fields_columns(self._table)
        return self._table

    @property
    def alias(self):
        """ An alias used for queries. """
        if not hasattr(self, '_alias'):
            self._alias = self.table.alias('entry')
        return self._alias

    @property
    def mapping(self):
        if not hasattr(self, '_mapping'):
            self._mapping = {}
            for attribute in self.dataset.model.attributes:
                if attribute.column in self.alias.columns:
                    col = self.alias.c[attribute.column]
                    self._mapping[attribute.path] = col
        return self._mapping

    @property
    def exists(self):
        return db.engine.has_table(self.table.name)

    def _fields_columns(self, table):
        """ Transform the (auto-detected) fields into a set of column
        specifications. """
        for field in self.dataset.fields:
            data_type = TYPES.get(field.get('type'), Unicode)
            col = Column(field.get('name'), data_type, nullable=True)
            table.append_column(col)

    def load_iter(self, iterable, chunk_size=1000):
        """ Bulk load all the data in an artifact to a matching database
        table. """
        chunk = []

        conn = self.bind.connect()
        tx = conn.begin()
        try:
            for i, record in enumerate(iterable):
                chunk.append(self._expand_record(i, record))
                if len(chunk) >= chunk_size:
                    stmt = self.table.insert()
                    conn.execute(stmt, chunk)
                    chunk = []

            if len(chunk):
                stmt = self.table.insert()
                conn.execute(stmt, chunk)
            tx.commit()
        except:
            tx.rollback()
            raise

    def _expand_record(self, i, record):
        """ Transform an incoming record into a form that matches the
        fields schema. """
        record['_id'] = i
        record['_json'] = json.dumps(record, default=json_default)
        return record

    def unpack_entry(self, row):
        """ Convert a database-returned row into a nested and mapped
        fact representation. """
        row = dict(row.items())
        result = {'id': row.get('_id')}
        for dimension in self.dataset.model.dimensions:
            value = {}
            for attr in dimension.attributes:
                value[attr.name] = row.get(attr.column)
            result[dimension.name] = value
        for measure in self.dataset.model.measures:
            result[measure.name] = row.get(measure.column)
        return result

    def create(self):
        """ Create the fact table if it does not exist. """
        if not self.exists:
            self.table.create(self.bind)

    def drop(self):
        """ Drop the fact table if it does exist. """
        if self.exists:
            self.table.drop()
        self._table = None

    def num_entries(self):
        """ Get the number of facts that are currently loaded. """
        if not self.exists:
            return 0
        rp = self.bind.execute(self.table.count())
        return rp.fetchone()[0]

    def _dimension_columns(self, dimension):
        """ Filter the generated columns for those related to a
        particular dimension. """
        prefix = dimension.name + '.'
        columns = []
        for path, col in self.mapping.items():
            if path.startswith(prefix):
                columns.append(col)
        return columns

    def num_members(self, dimension):
        """ Get the number of members for the given dimension. """
        if not self.exists:
            return 0
        q = select(self._dimension_columns(dimension), distinct=True)
        rp = self.bind.execute(q.alias('counted').count())
        return rp.fetchone()[0]

    def dimension_members(self, dimension, conditions="1=1", offset=0,
                          limit=None):
        selects = self._dimension_columns(dimension)
        order_by = [s.asc() for s in selects]
        for entry in self.entries(conditions=conditions, order_by=order_by,
                                  selects=selects, distinct=True,
                                  offset=offset, limit=limit):
            yield entry.get(dimension.name)

    def entries(self, conditions="1=1", order_by=None, limit=None,
                selects=[], distinct=False, offset=0, step=10000):
        """ Generate a fully denormalized view of the entries on this
        table. This view is nested so that each dimension will be a hash
        of its attributes. """
        if not self.exists:
            return

        if not selects:
            selects = [self.alias.c._id] + self.mapping.values()

            # enforce stable sorting:
            if order_by is None:
                order_by = [self.alias.c._id.asc()]

        assert order_by is not None

        for i in count():
            qoffset = offset + (step * i)
            qlimit = step
            if limit is not None:
                qlimit = min(limit - (step * i), step)
            if qlimit <= 0:
                break

            query = select(selects, conditions, [], order_by=order_by,
                           distinct=distinct, limit=qlimit, offset=qoffset)
            rp = self.bind.execute(query)
            first_row = True
            while True:
                row = rp.fetchone()
                if row is None:
                    if first_row:
                        return
                    break
                first_row = False
                yield self.unpack_entry(row)

    def __repr__(self):
        return "<FactTable(%r)>" % (self.dataset)
Beispiel #40
0
class SQLTable(Component):

    _selects = 0
    _inserts = 0
    _updates = 0
    _finalized = False

    STORE_MODE_LOOKUP = "lookup"
    STORE_MODE_INSERT = "insert"
    STORE_MODE_UPSERT = "upsert"

    _pk = False

    columns = []

    create = True

    _unicode_errors = 0
    _lookup_changed_fields = None

    def __init__(self, name, connection, columns, label=None):

        super(SQLTable, self).__init__()

        self.sa_table = None
        self.sa_metadata = None

        self.name = name
        self.connection = connection

        self.label = label if label else name

        self.columns = columns or []
        for col in columns:
            col.sqltable = self

    def _get_sa_type(self, column):

        if (column.type == "Integer"):
            return Integer
        elif (column.type == "String"):
            #if (column.length is None): column.length = 128
            return Unicode(length=128)
        elif (column.type == "Float"):
            return Float
        elif (column.type == "Boolean"):
            return Boolean
        elif (column.type == "AutoIncrement"):
            return Integer
        elif (column.type == "Date"):
            return Date
        elif (column.type == "Time"):
            return Time
        elif (column.type == "DateTime"):
            return DateTime
        elif (column.type == "Binary"):
            return Binary
        else:
            raise Exception("Invalid data type (%s): %s" %
                            (column, column.type))

    def finalize(self, ctx):

        if (not SQLTable._finalized):
            SQLTable._finalized = True
            if (SQLTable._inserts + SQLTable._selects > 0):
                logger.info(
                    "SQLTable Totals  ins/upd/sel: %d/%d/%d " %
                    (SQLTable._inserts, SQLTable._updates, SQLTable._selects))

        if (self._inserts + self._selects > 0):
            logger.info(
                "SQLTable %-18s ins/upd/sel: %6d/%6d/%-6d " %
                (self.name, self._inserts, self._updates, self._selects))
        if (self._unicode_errors > 0):
            logger.warning(
                "SQLTable %s found %d warnings assigning non-unicode fields to unicode columns"
                % (self.name, self._unicode_errors))

        ctx.comp.finalize(self.connection)

        super(SQLTable, self).finalize(ctx)

    def initialize(self, ctx):

        super(SQLTable, self).initialize(ctx)

        if self._lookup_changed_fields == None:
            self._lookup_changed_fields = []

        ctx.comp.initialize(self.connection)

        logger.debug("Loading table %s on %s" % (self.name, self))

        self.sa_metadata = MetaData()
        self.sa_table = Table(self.name, self.sa_metadata)

        self._selects = 0
        self._inserts = 0
        self._updates = 0
        self._unicode_errors = 0

        # Drop?

        columns_ex = []
        for column in self.columns:

            logger.debug("Adding column to %s: %s" % (self, column))

            column.sqltable = self

            # Check for duplicate names
            if (column.name in columns_ex):
                raise ETLConfigurationException(
                    "Duplicate column name '%s' in %s" % (column.name, self))

            columns_ex.append(column.name)

            # Configure column
            if isinstance(column, SQLColumnFK):
                if column.fk_sqlcolumn.sqltable.sa_table is None:
                    logger.warning(
                        "Column %s foreign key %s table (%s) has not been defined in backend (ignoring).",
                        column, column.fk_sqlcolumn,
                        column.fk_sqlcolumn.sqltable)
                    continue

                self.sa_table.append_column(
                    Column(column.name,
                           self._get_sa_type(column),
                           ForeignKey(
                               column.fk_sqlcolumn.sqltable.sa_table.columns[
                                   column.fk_sqlcolumn.name]),
                           primary_key=column.pk,
                           nullable=column.nullable,
                           autoincrement=(True if column.type
                                          == "AutoIncrement" else False)))
            else:
                self.sa_table.append_column(
                    Column(column.name,
                           self._get_sa_type(column),
                           primary_key=column.pk,
                           nullable=column.nullable,
                           autoincrement=(True if column.type
                                          == "AutoIncrement" else False)))

        # Check schema:

        # Create if doesn't exist
        if (not self.connection.engine().has_table(self.name)):
            logger.info("Creating table %s" % self.name)
            self.sa_table.create(self.connection.connection())

        # TODO:? Extend?  (unsafe, allow read-only connections and make them default?)
        # TODO:? Delete columns (unsafe, allow read-only connections and make them default?)

    def pk(self, ctx):
        """
        Returns the primary key column definitToClauion, or None if none defined.
        """

        #if (self._pk == False):
        if True:
            pk_cols = []
            for col in self.columns:
                if col.pk:
                    pk_cols.append(col)

            if (len(pk_cols) > 1):
                raise Exception("Table %s has multiple primary keys: %s" %
                                (self.name, pk_cols))
            elif (len(pk_cols) == 1):
                self._pk = pk_cols[0]
            else:
                self._pk = None

        return self._pk

    def _attribsToClause(self, attribs):
        clauses = []
        for k, v in attribs.items():
            if isinstance(v, (list, tuple)):
                clauses.append(self.sa_table.c[k].in_(v))
            else:
                clauses.append(self.sa_table.c[k] == v)

        return and_(*clauses)

    def _rowtodict(self, row):

        d = {}
        for column in self.columns:
            #print column
            d[column.name] = getattr(row, column.name)

        return d

    def _find(self, ctx, attribs):

        self._selects = self._selects + 1
        SQLTable._selects = SQLTable._selects + 1

        query = self.sa_table.select(self._attribsToClause(attribs))
        rows = self.connection.connection().execute(query)

        for r in rows:
            # Ensure we return dicts, not RowProxys from SqlAlchemy
            yield self._rowtodict(r)

    def lookup(self, ctx, attribs, find_function=None):

        logger.debug("Lookup on '%s' attribs: %s" % (self, attribs))

        if (len(attribs.keys()) == 0):
            raise Exception(
                "Cannot lookup on table '%s' with no criteria (empty attribute set)"
                % self.name)

        find_function = find_function or self._find
        rows = find_function(ctx, attribs)
        rows = list(rows)
        if (len(rows) > 1):
            raise Exception(
                "Found more than one row when searching for just one in table %s: %s"
                % (self.name, attribs))
        elif (len(rows) == 1):
            row = rows[0]
        else:
            row = None

        logger.debug("Lookup result on %s: %s = %s" %
                     (self.name, attribs, row))
        return row

    def upsert(self, ctx, data, keys=[]):
        """
        Upsert checks if the row exists and has changed. It does a lookup
        followed by an update or insert as appropriate.
        """

        # TODO: Check for AutoIncrement in keys, shall not be used

        # If keys
        qfilter = {}
        if (len(keys) > 0):
            for key in keys:
                try:
                    qfilter[key] = data[key]
                except KeyError as e:
                    raise Exception(
                        "Could not find attribute '%s' in data when storing row data: %s"
                        % (key, data))
        else:
            pk = self.pk(ctx)
            qfilter[pk.name] = data[pk.name]

        # Do lookup
        if len(qfilter) > 0:

            row = self.lookup(ctx, qfilter)

            if (row):
                # Check row is identical
                for c in self.columns:
                    if c.type != "AutoIncrement":
                        v1 = row[c.name]
                        v2 = data[c.name]
                        if c.type == "Date":
                            v1 = row[c.name].strftime('%Y-%m-%d')
                            v2 = data[c.name].strftime('%Y-%m-%d')
                        if (isinstance(v1, str) or isinstance(v2, str)):
                            if (not isinstance(v1, str)): v1 = str(v1)
                            if (not isinstance(v2, str)): v2 = str(v2)
                        if (v1 != v2):
                            if (c.name not in self._lookup_changed_fields):
                                logger.warning(
                                    "%s updating an entity that exists with different attributes, overwriting (field=%s, existing_value=%s, tried_value=%s)"
                                    % (self, c.name, v1, v2))
                                #self._lookup_changed_fields.append(c["name"])

                # Update the row
                row = self.update(ctx, data, keys)
                return row

        row_with_id = self.insert(ctx, data)
        return row_with_id

    def _prepare_row(self, ctx, data):

        row = {}

        for column in self.columns:
            if column.type != "AutoIncrement":
                try:
                    row[column.name] = data[column.name]
                except KeyError as e:
                    raise Exception(
                        "Missing attribute for column %s in table '%s' while inserting row: %s"
                        % (e, self.name, data))

                # Checks
                if (column.type == "String") and (not isinstance(
                        row[column.name], str)):
                    self._unicode_errors = self._unicode_errors + 1
                    if (ctx.debug):
                        logger.warning(
                            "Unicode column %r received non-unicode string: %r "
                            % (column.name, row[column.name]))

        return row

    def insert(self, ctx, data):

        row = self._prepare_row(ctx, data)

        logger.debug("Inserting in table '%s' row: %s" % (self.name, row))
        res = self.connection.connection().execute(self.sa_table.insert(row))

        pk = self.pk(ctx)
        if pk:
            row[pk.name] = res.inserted_primary_key[0]

        self._inserts = self._inserts + 1
        SQLTable._inserts = SQLTable._inserts + 1

        if pk is not None:
            return row
        else:
            return row  # None

    def update(self, ctx, data, keys=[]):

        row = self._prepare_row(ctx, data)

        # Automatically calculate lookup if necessary
        qfilter = {}
        if (len(keys) > 0):
            for key in keys:
                try:
                    qfilter[key] = data[key]
                except KeyError as e:
                    raise Exception(
                        "Could not find attribute '%s' in data when storing row data: %s"
                        % (key, data))
        else:
            pk = self.pk(ctx)
            qfilter[pk.name] = data[pk.name]

        logger.debug("Updating in table '%s' row: %s" % (self.name, row))
        res = self.connection.connection().execute(
            self.sa_table.update(self._attribsToClause(qfilter), row))

        self._updates = self._updates + 1
        SQLTable._updates = SQLTable._updates + 1

        if pk is not None:
            return row
        else:
            return None
Beispiel #41
0
class SQLTable(Component):

    _selects = 0
    _inserts = 0
    _updates = 0
    _finalized = False

    STORE_MODE_LOOKUP = "lookup"
    STORE_MODE_INSERT = "insert"
    STORE_MODE_UPSERT = "upsert"

    _pk = False

    columns = []

    create = True

    _unicode_errors = 0
    _lookup_changed_fields = None

    def __init__(self, name, connection, columns, label=None):

        super(SQLTable, self).__init__()

        self.sa_table = None
        self.sa_metadata = None

        self.name = name
        self.connection = connection

        self.label = label if label else name

        self.columns = columns or []
        for col in columns:
            col.sqltable = self

    def _get_sa_type(self, column):

        if (column.type == "Integer"):
            return Integer
        elif (column.type == "String"):
            #if (column.length is None): column.length = 128
            return Unicode(length = 128)
        elif (column.type == "Float"):
            return Float
        elif (column.type == "Boolean"):
            return Boolean
        elif (column.type == "AutoIncrement"):
            return Integer
        elif (column.type == "Date"):
            return Date
        elif (column.type == "Time"):
            return Time
        elif (column.type == "DateTime"):
            return DateTime
        elif (column.type == "Binary"):
            return Binary
        else:
            raise Exception("Invalid data type (%s): %s" % (column, column.type))

    def finalize(self, ctx):

        if (not SQLTable._finalized):
            SQLTable._finalized = True
            if (SQLTable._inserts + SQLTable._selects > 0):
                logger.info("SQLTable Totals  ins/upd/sel: %d/%d/%d " %
                            (SQLTable._inserts, SQLTable._updates, SQLTable._selects))

        if (self._inserts + self._selects > 0):
            logger.info("SQLTable %-18s ins/upd/sel: %6d/%6d/%-6d " %
                            (self.name, self._inserts, self._updates, self._selects))
        if (self._unicode_errors > 0):
            logger.warning("SQLTable %s found %d warnings assigning non-unicode fields to unicode columns" %
                           (self.name, self._unicode_errors))

        ctx.comp.finalize(self.connection)

        super(SQLTable, self).finalize(ctx)

    def initialize(self, ctx):

        super(SQLTable, self).initialize(ctx)

        if self._lookup_changed_fields == None:
            self._lookup_changed_fields = []

        ctx.comp.initialize(self.connection)

        logger.debug("Loading table %s on %s" % (self.name, self))

        self.sa_metadata = MetaData()
        self.sa_table = Table(self.name, self.sa_metadata)

        self._selects = 0
        self._inserts = 0
        self._updates = 0
        self._unicode_errors = 0

        # Drop?

        columns_ex = []
        for column in self.columns:

            logger.debug("Adding column to %s: %s" % (self, column))

            column.sqltable = self

            # Check for duplicate names
            if (column.name in columns_ex):
                raise ETLConfigurationException("Duplicate column name '%s' in %s" % (column.name, self))

            columns_ex.append(column.name)

            # Configure column
            if isinstance(column, SQLColumnFK):
                if column.fk_sqlcolumn.sqltable.sa_table is None:
                    logger.warning("Column %s foreign key %s table (%s) has not been defined in backend (ignoring).", column, column.fk_sqlcolumn, column.fk_sqlcolumn.sqltable)
                    continue

                self.sa_table.append_column(Column(column.name,
                                                   self._get_sa_type(column),
                                                   ForeignKey(column.fk_sqlcolumn.sqltable.sa_table.columns[column.fk_sqlcolumn.name]),
                                                   primary_key=column.pk,
                                                   nullable=column.nullable,
                                                   autoincrement=(True if column.type == "AutoIncrement" else False)))
            else:
                self.sa_table.append_column(Column(column.name,
                                                   self._get_sa_type(column),
                                                   primary_key=column.pk,
                                                   nullable=column.nullable,
                                                   autoincrement=(True if column.type == "AutoIncrement" else False)))

        # Check schema:

        # Create if doesn't exist
        if (not self.connection.engine().has_table(self.name)):
            logger.info("Creating table %s" % self.name)
            self.sa_table.create(self.connection.connection())

        # TODO:? Extend?  (unsafe, allow read-only connections and make them default?)
        # TODO:? Delete columns (unsafe, allow read-only connections and make them default?)

    def pk(self, ctx):
        """
        Returns the primary key column definitToClauion, or None if none defined.
        """

        #if (self._pk == False):
        if True:
            pk_cols = []
            for col in self.columns:
                if col.pk:
                    pk_cols.append(col)

            if (len(pk_cols) > 1):
                raise Exception("Table %s has multiple primary keys: %s" % (self.name, pk_cols))
            elif (len(pk_cols) == 1):
                self._pk = pk_cols[0]
            else:
                self._pk = None

        return self._pk

    def _attribsToClause(self, attribs):
        clauses = []
        for k, v in attribs.items():
            if isinstance(v, (list, tuple)):
                clauses.append(self.sa_table.c[k].in_(v))
            else:
                clauses.append(self.sa_table.c[k] == v)

        return and_(*clauses)

    def _rowtodict(self, row):

        d = {}
        for column in self.columns:
            #print column
            d[column.name] = getattr(row, column.name)

        return d

    def _find(self, ctx, attribs):

        self._selects = self._selects + 1
        SQLTable._selects = SQLTable._selects + 1

        query = self.sa_table.select(self._attribsToClause(attribs))
        rows = self.connection.connection().execute(query)

        for r in rows:
            # Ensure we return dicts, not RowProxys from SqlAlchemy
            yield self._rowtodict(r)


    def lookup(self, ctx, attribs, find_function=None):

        logger.debug ("Lookup on '%s' attribs: %s" % (self, attribs))

        if (len(attribs.keys()) == 0):
            raise Exception("Cannot lookup on table '%s' with no criteria (empty attribute set)" % self.name)

        find_function = find_function or self._find
        rows = find_function(ctx, attribs)
        rows = list(rows)
        if (len(rows) > 1):
            raise Exception("Found more than one row when searching for just one in table %s: %s" % (self.name, attribs))
        elif (len(rows) == 1):
            row = rows[0]
        else:
            row = None

        logger.debug("Lookup result on %s: %s = %s" % (self.name, attribs, row))
        return row

    def upsert(self, ctx, data, keys = []):
        """
        Upsert checks if the row exists and has changed. It does a lookup
        followed by an update or insert as appropriate.
        """

        # TODO: Check for AutoIncrement in keys, shall not be used

        # If keys
        qfilter = {}
        if (len(keys) > 0):
            for key in keys:
                try:
                    qfilter[key] = data[key]
                except KeyError as e:
                    raise Exception("Could not find attribute '%s' in data when storing row data: %s" % (key, data))
        else:
            pk = self.pk(ctx)
            qfilter[pk.name] = data[pk.name]

        # Do lookup
        if len(qfilter) > 0:

            row = self.lookup(ctx, qfilter)

            if (row):
                # Check row is identical
                for c in self.columns:
                    if c.type != "AutoIncrement":
                        v1 = row[c.name]
                        v2 = data[c.name]
                        if c.type == "Date":
                            v1 = row[c.name].strftime('%Y-%m-%d')
                            v2 = data[c.name].strftime('%Y-%m-%d')
                        if (isinstance(v1, str) or isinstance(v2, str)):
                            if (not isinstance(v1, str)): v1 = str(v1)
                            if (not isinstance(v2, str)): v2 = str(v2)
                        if (v1 != v2):
                            if (c.name not in self._lookup_changed_fields):
                                logger.warn("%s updating an entity that exists with different attributes, overwriting (field=%s, existing_value=%s, tried_value=%s)" % (self, c.name, v1, v2))
                                #self._lookup_changed_fields.append(c["name"])

                # Update the row
                row = self.update(ctx, data, keys)
                return row

        row_with_id = self.insert(ctx, data)
        return row_with_id

    def _prepare_row(self, ctx, data):

        row = {}

        for column in self.columns:
            if column.type != "AutoIncrement":
                try:
                    row[column.name] = data[column.name]
                except KeyError as e:
                    raise Exception("Missing attribute for column %s in table '%s' while inserting row: %s" % (e, self.name, data))

                # Checks
                if (column.type == "String") and (not isinstance(row[column.name], str)):
                    self._unicode_errors = self._unicode_errors + 1
                    if (ctx.debug):
                        logger.warn("Unicode column %r received non-unicode string: %r " % (column.name, row[column.name]))

        return row

    def insert(self, ctx, data):

        row = self._prepare_row(ctx, data)

        logger.debug("Inserting in table '%s' row: %s" % (self.name, row))
        res = self.connection.connection().execute(self.sa_table.insert(row))

        pk = self.pk(ctx)
        if pk:
            row[pk.name] = res.inserted_primary_key[0]

        self._inserts = self._inserts + 1
        SQLTable._inserts = SQLTable._inserts + 1

        if pk is not None:
            return row
        else:
            return row  # None

    def update(self, ctx, data, keys = []):

        row = self._prepare_row(ctx, data)

        # Automatically calculate lookup if necessary
        qfilter = {}
        if (len(keys) > 0):
            for key in keys:
                try:
                    qfilter[key] = data[key]
                except KeyError as e:
                    raise Exception("Could not find attribute '%s' in data when storing row data: %s" % (key, data))
        else:
            pk = self.pk(ctx)
            qfilter[pk.name] = data[pk.name]

        logger.debug("Updating in table '%s' row: %s" % (self.name, row))
        res = self.connection.connection().execute(self.sa_table.update(self._attribsToClause(qfilter), row))

        self._updates = self._updates +1
        SQLTable._updates = SQLTable._updates + 1

        if pk is not None:
            return row
        else:
            return None
Beispiel #42
0
def create_translation_table(_table_name, foreign_class, relation_name,
    language_class, relation_lazy='select', **kwargs):
    """Creates a table that represents some kind of data attached to the given
    foreign class, but translated across several languages.  Returns the new
    table's mapped class.  It won't be declarative, but it will have a
    `__table__` attribute so you can retrieve the Table object.

    `foreign_class` must have a `__singlename__`, currently only used to create
    the name of the foreign key column.

    Also supports the notion of a default language, which is attached to the
    session.  This is English by default, for historical and practical reasons.

    Usage looks like this:

        class Foo(Base): ...

        create_translation_table('foo_bars', Foo, 'bars',
            name = Column(...),
        )

        # Now you can do the following:
        foo.name
        foo.name_map['en']
        foo.foo_bars['en']

        foo.name_map['en'] = "new name"
        del foo.name_map['en']

        q.options(joinedload(Foo.bars_local))
        q.options(joinedload(Foo.bars))

    The following properties are added to the passed class:

    - `(relation_name)`, a relation to the new table.  It uses a dict-based
      collection class, where the keys are language identifiers and the values
      are rows in the created tables.
    - `(relation_name)_local`, a relation to the row in the new table that
      matches the current default language.
    - `(relation_name)_table`, the class created by this function.

    Note that these are distinct relations.  Even though the former necessarily
    includes the latter, SQLAlchemy doesn't treat them as linked; loading one
    will not load the other.  Modifying both within the same transaction has
    undefined behavior.

    For each column provided, the following additional attributes are added to
    Foo:

    - `(column)_map`, an association proxy onto `foo_bars`.
    - `(column)`, an association proxy onto `foo_bars_local`.

    Pardon the naming disparity, but the grammar suffers otherwise.

    Modifying these directly is not likely to be a good idea.

    For Markdown-formatted columns, `(column)_map` and `(column)` will give
    Markdown objects.
    """
    # n.b.: language_class only exists for the sake of tests, which sometimes
    # want to create tables entirely separate from the pokedex metadata

    foreign_key_name = foreign_class.__singlename__ + '_id'

    Translations = type(_table_name, (object,), {
        '_language_identifier': association_proxy('local_language', 'identifier'),
        'relation_name': relation_name,
        '__tablename__': _table_name,
    })

    # Create the table object
    table = Table(_table_name, foreign_class.__table__.metadata,
        Column(foreign_key_name, Integer, ForeignKey(foreign_class.id),
            primary_key=True, nullable=False,
            doc=u"ID of the %s these texts relate to" % foreign_class.__singlename__),
        Column('local_language_id', Integer, ForeignKey(language_class.id),
            primary_key=True, nullable=False,
            doc=u"Language these texts are in"),
    )
    Translations.__table__ = table

    # Add ye columns
    # Column objects have a _creation_order attribute in ascending order; use
    # this to get the (unordered) kwargs sorted correctly
    kwitems = list(kwargs.items())
    kwitems.sort(key=lambda kv: kv[1]._creation_order)
    for name, column in kwitems:
        column.name = name
        table.append_column(column)

    # Construct ye mapper
    mapper(Translations, table, properties={
        'foreign_id': synonym(foreign_key_name),
        'local_language': relationship(language_class,
            primaryjoin=table.c.local_language_id == language_class.id,
            innerjoin=True),
    })

    # Add full-table relations to the original class
    # Foo.bars_table
    setattr(foreign_class, relation_name + '_table', Translations)
    # Foo.bars
    setattr(foreign_class, relation_name, relationship(Translations,
        primaryjoin=foreign_class.id == Translations.foreign_id,
        collection_class=attribute_mapped_collection('local_language'),
    ))
    # Foo.bars_local
    # This is a bit clever; it uses bindparam() to make the join clause
    # modifiable on the fly.  db sessions know the current language and
    # populate the bindparam.
    # The 'dummy' value is to trick SQLA; without it, SQLA thinks this
    # bindparam is just its own auto-generated clause and everything gets
    # f****d up.
    local_relation_name = relation_name + '_local'
    setattr(foreign_class, local_relation_name, relationship(Translations,
        primaryjoin=and_(
            Translations.foreign_id == foreign_class.id,
            Translations.local_language_id == bindparam('_default_language_id',
                value='dummy', type_=Integer, required=True),
        ),
        foreign_keys=[Translations.foreign_id, Translations.local_language_id],
        uselist=False,
        lazy=relation_lazy,
    ))

    # Add per-column proxies to the original class
    for name, column in kwitems:
        getset_factory = None
        string_getter = column.info.get('string_getter')
        if string_getter:
            getset_factory = _getset_factory_factory(
                column.name, string_getter)

        # Class.(column) -- accessor for the default language's value
        setattr(foreign_class, name,
            LocalAssociationProxy(local_relation_name, name,
                    getset_factory=getset_factory))

        # Class.(column)_map -- accessor for the language dict
        # Need a custom creator since Translations doesn't have an init, and
        # these are passed as *args anyway
        def creator(language, value):
            row = Translations()
            row.local_language = language
            setattr(row, name, value)
            return row
        setattr(foreign_class, name + '_map',
            association_proxy(relation_name, name, creator=creator,
                    getset_factory=getset_factory))

    # Add to the list of translation classes
    foreign_class.translation_classes.append(Translations)

    # Done
    return Translations
Beispiel #43
0
def process_progress_notes(table: Table, engine: Engine,
                           progargs: Any) -> None:
    crate_col_max_subnum = Column(CRATE_COL_MAX_SUBNUM, Integer, nullable=True)
    crate_col_last_note = Column(CRATE_COL_LAST_NOTE, Integer, nullable=True)
    table.append_column(crate_col_max_subnum)
    table.append_column(crate_col_last_note)
    add_columns(engine, table, [crate_col_max_subnum, crate_col_last_note])
    # We're always in "RiO land", not "RCEP land", for this one.
    add_indexes(engine, table, [
        {  # Joint index, for JOIN in UPDATE statement below
            'index_name': CRATE_IDX_RIONUM_NOTENUM,
            'column': '{rio_number}, NoteNum'.format(
                rio_number=CRATE_COL_RIO_NUMBER),
        },
        {  # Speeds up WHERE below. (Much, much faster for second run.)
            'index_name': CRATE_IDX_MAX_SUBNUM,
            'column': CRATE_COL_MAX_SUBNUM,
        },
        {  # Speeds up WHERE below. (Much, much faster for second run.)
            'index_name': CRATE_IDX_LAST_NOTE,
            'column': CRATE_COL_LAST_NOTE,
        },
    ])

    ensure_columns_present(
        engine,
        tablename=table.name,
        column_names=["NoteNum", "SubNum", "EnteredInError", "EnteredInError"])
    if not progargs.print:
        ensure_columns_present(engine,
                               tablename=table.name,
                               column_names=[
                                   CRATE_COL_MAX_SUBNUM, CRATE_COL_LAST_NOTE,
                                   CRATE_COL_RIO_NUMBER
                               ])

    # Find the maximum SubNum for each note, and store it.
    # Slow query, even with index.
    log.info("Progress notes table {}: updating {}".format(
        repr(table.name), repr(CRATE_COL_MAX_SUBNUM)))
    execute(
        engine, """
        UPDATE p1
        SET p1.{max_subnum_col} = subq.max_subnum
        FROM {tablename} p1 JOIN (
            SELECT {rio_number}, NoteNum, MAX(SubNum) AS max_subnum
            FROM {tablename} p2
            GROUP BY {rio_number}, NoteNum
        ) subq
        ON subq.{rio_number} = p1.{rio_number}
        AND subq.NoteNum = p1.NoteNum
        WHERE p1.{max_subnum_col} IS NULL
    """.format(
            max_subnum_col=CRATE_COL_MAX_SUBNUM,
            tablename=table.name,
            rio_number=CRATE_COL_RIO_NUMBER,
        ))

    # Set a single column accordingly
    log.info("Progress notes table {}: updating {}".format(
        repr(table.name), repr(CRATE_COL_LAST_NOTE)))
    execute(
        engine, """
        UPDATE {tablename} SET
            {last_note_col} =
                CASE
                    WHEN SubNum = {max_subnum_col} THEN 1
                    ELSE 0
                END
        WHERE {last_note_col} IS NULL
    """.format(
            tablename=table.name,
            last_note_col=CRATE_COL_LAST_NOTE,
            max_subnum_col=CRATE_COL_MAX_SUBNUM,
        ))

    # Create a view, if we're on an RCEP database
    if progargs.rcep and progargs.cpft:
        select_sql = """
            SELECT *
            FROM {tablename}
            WHERE
                (EnteredInError <> 1 OR EnteredInError IS NULL)
                AND {last_note_col} = 1
        """.format(
            tablename=table.name,
            last_note_col=CRATE_COL_LAST_NOTE,
        )
        create_view(engine, VIEW_RCEP_CPFT_PROGRESS_NOTES_CURRENT, select_sql)
Beispiel #44
0
def create_translation_table(_table_name, foreign_class, relation_name,
    language_class, relation_lazy='select', **kwargs):
    """Creates a table that represents some kind of data attached to the given
    foreign class, but translated across several languages.  Returns the new
    table's mapped class.  It won't be declarative, but it will have a
    `__table__` attribute so you can retrieve the Table object.

    `foreign_class` must have a `__singlename__`, currently only used to create
    the name of the foreign key column.

    Also supports the notion of a default language, which is attached to the
    session.  This is English by default, for historical and practical reasons.

    Usage looks like this:

        class Foo(Base): ...

        create_translation_table('foo_bars', Foo, 'bars',
            name = Column(...),
        )

        # Now you can do the following:
        foo.name
        foo.name_map['en']
        foo.foo_bars['en']

        foo.name_map['en'] = "new name"
        del foo.name_map['en']

        q.options(joinedload(Foo.bars_local))
        q.options(joinedload(Foo.bars))

    The following properties are added to the passed class:

    - `(relation_name)`, a relation to the new table.  It uses a dict-based
      collection class, where the keys are language identifiers and the values
      are rows in the created tables.
    - `(relation_name)_local`, a relation to the row in the new table that
      matches the current default language.
    - `(relation_name)_table`, the class created by this function.

    Note that these are distinct relations.  Even though the former necessarily
    includes the latter, SQLAlchemy doesn't treat them as linked; loading one
    will not load the other.  Modifying both within the same transaction has
    undefined behavior.

    For each column provided, the following additional attributes are added to
    Foo:

    - `(column)_map`, an association proxy onto `foo_bars`.
    - `(column)`, an association proxy onto `foo_bars_local`.

    Pardon the naming disparity, but the grammar suffers otherwise.

    Modifying these directly is not likely to be a good idea.

    For Markdown-formatted columns, `(column)_map` and `(column)` will give
    Markdown objects.
    """
    # n.b.: language_class only exists for the sake of tests, which sometimes
    # want to create tables entirely separate from the pokedex metadata

    foreign_key_name = foreign_class.__singlename__ + '_id'

    Translations = type(_table_name, (object,), {
        '_language_identifier': association_proxy('local_language', 'identifier'),
        'relation_name': relation_name,
    })

    # Create the table object
    table = Table(_table_name, foreign_class.__table__.metadata,
        Column(foreign_key_name, Integer, ForeignKey(foreign_class.id),
            primary_key=True, nullable=False,
            info=dict(description="ID of the %s these texts relate to" % foreign_class.__singlename__)),
        Column('local_language_id', Integer, ForeignKey(language_class.id),
            primary_key=True, nullable=False,
            info=dict(description="Language these texts are in")),
    )
    Translations.__table__ = table

    # Add ye columns
    # Column objects have a _creation_order attribute in ascending order; use
    # this to get the (unordered) kwargs sorted correctly
    kwitems = kwargs.items()
    kwitems.sort(key=lambda kv: kv[1]._creation_order)
    for name, column in kwitems:
        column.name = name
        table.append_column(column)

    # Construct ye mapper
    mapper(Translations, table, properties={
        'foreign_id': synonym(foreign_key_name),
        'local_language': relationship(language_class,
            primaryjoin=table.c.local_language_id == language_class.id,
            innerjoin=True),
    })

    # Add full-table relations to the original class
    # Foo.bars_table
    setattr(foreign_class, relation_name + '_table', Translations)
    # Foo.bars
    setattr(foreign_class, relation_name, relationship(Translations,
        primaryjoin=foreign_class.id == Translations.foreign_id,
        collection_class=attribute_mapped_collection('local_language'),
    ))
    # Foo.bars_local
    # This is a bit clever; it uses bindparam() to make the join clause
    # modifiable on the fly.  db sessions know the current language and
    # populate the bindparam.
    # The 'dummy' value is to trick SQLA; without it, SQLA thinks this
    # bindparam is just its own auto-generated clause and everything gets
    # f****d up.
    local_relation_name = relation_name + '_local'
    setattr(foreign_class, local_relation_name, relationship(Translations,
        primaryjoin=and_(
            Translations.foreign_id == foreign_class.id,
            Translations.local_language_id == bindparam('_default_language_id',
                value='dummy', type_=Integer, required=True),
        ),
        foreign_keys=[Translations.foreign_id, Translations.local_language_id],
        uselist=False,
        #innerjoin=True,
        lazy=relation_lazy,
    ))

    # Add per-column proxies to the original class
    for name, column in kwitems:
        getset_factory = None
        string_getter = column.info.get('string_getter')
        if string_getter:
            getset_factory = _getset_factory_factory(
                column.name, string_getter)

        # Class.(column) -- accessor for the default language's value
        setattr(foreign_class, name,
            LocalAssociationProxy(local_relation_name, name,
                    getset_factory=getset_factory))

        # Class.(column)_map -- accessor for the language dict
        # Need a custom creator since Translations doesn't have an init, and
        # these are passed as *args anyway
        def creator(language, value):
            row = Translations()
            row.local_language = language
            setattr(row, name, value)
            return row
        setattr(foreign_class, name + '_map',
            association_proxy(relation_name, name, creator=creator,
                    getset_factory=getset_factory))

    # Add to the list of translation classes
    foreign_class.translation_classes.append(Translations)

    # Done
    return Translations
Beispiel #45
0
def process_patient_table(table: Table, engine: Engine, progargs: Any) -> None:
    log.info("Preprocessing patient table: {}".format(repr(table.name)))
    rio_type = table_is_rio_type(table.name, progargs)
    if rio_type:
        pk_col = get_effective_int_pk_col(table)
        rio_pk = pk_col if pk_col != CRATE_COL_PK else None
        string_pt_id = get_rio_patient_id_col(table)
        required_cols = [string_pt_id]
    else:  # RCEP type
        rio_pk = None
        required_cols = [RCEP_COL_PATIENT_ID]
        string_pt_id = RCEP_COL_PATIENT_ID
    if not progargs.print:
        required_cols.extend([CRATE_COL_PK, CRATE_COL_RIO_NUMBER])

    # -------------------------------------------------------------------------
    # Add pk and rio_number columns, if not present
    # -------------------------------------------------------------------------
    if rio_type and rio_pk is not None:
        crate_pk_col = Column(CRATE_COL_PK, BigInteger, nullable=True)
        # ... can't do NOT NULL; need to populate it
        required_cols.append(rio_pk)
    else:  # RCEP type, or no PK in RiO
        crate_pk_col = make_bigint_autoincrement_column(
            CRATE_COL_PK, engine.dialect)
        # ... autopopulates
    crate_rio_number_col = Column(CRATE_COL_RIO_NUMBER,
                                  BigInteger,
                                  nullable=True)
    # ... even if RiO numbers are INT, they come from VARCHAR(15) here, and
    # that can (aod does) look numeric and overflow an INT.
    # SQL Server requires Table-bound columns in order to generate DDL:
    table.append_column(crate_pk_col)
    table.append_column(crate_rio_number_col)
    add_columns(engine, table, [crate_pk_col, crate_rio_number_col])

    # -------------------------------------------------------------------------
    # Update pk and rio_number values, if not NULL
    # -------------------------------------------------------------------------
    ensure_columns_present(engine,
                           tablename=table.name,
                           column_names=required_cols)
    cast_id_to_int = sql_fragment_cast_to_int(string_pt_id,
                                              dialect=engine.dialect)
    if rio_type and rio_pk:
        log.info("Table {}: updating columns {} and {}".format(
            repr(table.name), repr(CRATE_COL_PK), repr(CRATE_COL_RIO_NUMBER)))
        execute(
            engine, """
            UPDATE {tablename} SET
                {crate_pk} = {rio_pk},
                {crate_rio_number} = {cast_id_to_int}
            WHERE
                {crate_pk} IS NULL
                OR {crate_rio_number} IS NULL
        """.format(
                tablename=table.name,
                crate_pk=CRATE_COL_PK,
                rio_pk=rio_pk,
                crate_rio_number=CRATE_COL_RIO_NUMBER,
                cast_id_to_int=cast_id_to_int,
            ))
    else:
        # RCEP format, or RiO with no PK
        # crate_pk is autogenerated as an INT IDENTITY field
        log.info("Table {}: updating column {}".format(
            repr(table.name), repr(CRATE_COL_RIO_NUMBER)))
        execute(
            engine,
            """
            UPDATE {tablename} SET
                {crate_rio_number} = {cast_id_to_int}
            WHERE
                {crate_rio_number} IS NULL
        """.format(  # noqa
                tablename=table.name,
                crate_rio_number=CRATE_COL_RIO_NUMBER,
                cast_id_to_int=cast_id_to_int,
            ))
    # -------------------------------------------------------------------------
    # Add indexes, if absent
    # -------------------------------------------------------------------------
    # Note that the indexes are unlikely to speed up the WHERE NOT NULL search
    # above, so it doesn't matter that we add these last. Their use is for
    # the subsequent CRATE anonymisation table scans.
    add_indexes(engine, table, [
        {
            'index_name': CRATE_IDX_PK,
            'column': CRATE_COL_PK,
            'unique': True,
        },
        {
            'index_name': CRATE_IDX_RIONUM,
            'column': CRATE_COL_RIO_NUMBER,
        },
    ])
Beispiel #46
0
class FactTable(object):
    """ The ``FactTable`` serves as a controller object for
    a given ``Model``, handling the creation, filling and migration
    of the table schema associated with the dataset. """
    def __init__(self, dataset):
        self.dataset = dataset
        self.bind = db.engine
        self.meta = MetaData()
        self.meta.bind = self.bind
        self._table = None

    @property
    def table(self):
        """ Generate an appropriate table representation to mirror the
        fields known for this table. """
        if self._table is None:
            name = '%s__facts' % self.dataset.name
            self._table = Table(name, self.meta)
            id_col = Column('_id', Unicode(42), primary_key=True)
            self._table.append_column(id_col)
            json_col = Column('_json', Unicode())
            self._table.append_column(json_col)
            self._fields_columns(self._table)
        return self._table

    @property
    def alias(self):
        """ An alias used for queries. """
        if not hasattr(self, '_alias'):
            self._alias = self.table.alias('entry')
        return self._alias

    @property
    def mapping(self):
        if not hasattr(self, '_mapping'):
            self._mapping = {}
            for attribute in self.dataset.model.attributes:
                if attribute.column in self.alias.columns:
                    col = self.alias.c[attribute.column]
                    self._mapping[attribute.path] = col
        return self._mapping

    @property
    def exists(self):
        return db.engine.has_table(self.table.name)

    def _fields_columns(self, table):
        """ Transform the (auto-detected) fields into a set of column
        specifications. """
        for field in self.dataset.fields:
            data_type = TYPES.get(field.get('type'), Unicode)
            col = Column(field.get('name'), data_type, nullable=True)
            table.append_column(col)

    def load_iter(self, iterable, chunk_size=1000):
        """ Bulk load all the data in an artifact to a matching database
        table. """
        chunk = []

        conn = self.bind.connect()
        tx = conn.begin()
        try:
            for i, record in enumerate(iterable):
                chunk.append(self._expand_record(i, record))
                if len(chunk) >= chunk_size:
                    stmt = self.table.insert()
                    conn.execute(stmt, chunk)
                    chunk = []

            if len(chunk):
                stmt = self.table.insert()
                conn.execute(stmt, chunk)
            tx.commit()
        except:
            tx.rollback()
            raise

    def _expand_record(self, i, record):
        """ Transform an incoming record into a form that matches the
        fields schema. """
        record['_id'] = i
        record['_json'] = json.dumps(record, default=json_default)
        return record

    def unpack_entry(self, row):
        """ Convert a database-returned row into a nested and mapped
        fact representation. """
        row = dict(row.items())
        result = {'id': row.get('_id')}
        for dimension in self.dataset.model.dimensions:
            value = {}
            for attr in dimension.attributes:
                value[attr.name] = row.get(attr.column)
            result[dimension.name] = value
        for measure in self.dataset.model.measures:
            result[measure.name] = row.get(measure.column)
        return result

    def create(self):
        """ Create the fact table if it does not exist. """
        if not self.exists:
            self.table.create(self.bind)

    def drop(self):
        """ Drop the fact table if it does exist. """
        if self.exists:
            self.table.drop()
        self._table = None

    def num_entries(self):
        """ Get the number of facts that are currently loaded. """
        if not self.exists:
            return 0
        rp = self.bind.execute(self.table.count())
        return rp.fetchone()[0]

    def _dimension_columns(self, dimension):
        """ Filter the generated columns for those related to a
        particular dimension. """
        prefix = dimension.name + '.'
        columns = []
        for path, col in self.mapping.items():
            if path.startswith(prefix):
                columns.append(col)
        return columns

    def num_members(self, dimension):
        """ Get the number of members for the given dimension. """
        if not self.exists:
            return 0
        q = select(self._dimension_columns(dimension), distinct=True)
        rp = self.bind.execute(q.alias('counted').count())
        return rp.fetchone()[0]

    def dimension_members(self,
                          dimension,
                          conditions="1=1",
                          offset=0,
                          limit=None):
        selects = self._dimension_columns(dimension)
        order_by = [s.asc() for s in selects]
        for entry in self.entries(conditions=conditions,
                                  order_by=order_by,
                                  selects=selects,
                                  distinct=True,
                                  offset=offset,
                                  limit=limit):
            yield entry.get(dimension.name)

    def entries(self,
                conditions="1=1",
                order_by=None,
                limit=None,
                selects=[],
                distinct=False,
                offset=0,
                step=10000):
        """ Generate a fully denormalized view of the entries on this
        table. This view is nested so that each dimension will be a hash
        of its attributes. """
        if not self.exists:
            return

        if not selects:
            selects = [self.alias.c._id] + self.mapping.values()

            # enforce stable sorting:
            if order_by is None:
                order_by = [self.alias.c._id.asc()]

        assert order_by is not None

        for i in count():
            qoffset = offset + (step * i)
            qlimit = step
            if limit is not None:
                qlimit = min(limit - (step * i), step)
            if qlimit <= 0:
                break

            query = select(selects,
                           conditions, [],
                           order_by=order_by,
                           distinct=distinct,
                           limit=qlimit,
                           offset=qoffset)
            rp = self.bind.execute(query)
            first_row = True
            while True:
                row = rp.fetchone()
                if row is None:
                    if first_row:
                        return
                    break
                first_row = False
                yield self.unpack_entry(row)

    def __repr__(self):
        return "<FactTable(%r)>" % (self.dataset)
Beispiel #47
0
def process_clindocs_table(table: Table, engine: Engine,
                           progargs: Any) -> None:
    # For RiO only, not RCEP
    crate_col_max_docver = Column(CRATE_COL_MAX_DOCVER, Integer, nullable=True)
    crate_col_last_doc = Column(CRATE_COL_LAST_DOC, Integer, nullable=True)
    table.append_column(crate_col_max_docver)
    table.append_column(crate_col_last_doc)
    add_columns(engine, table, [crate_col_max_docver, crate_col_last_doc])
    add_indexes(engine, table, [
        {
            'index_name':
            CRATE_IDX_RIONUM_SERIALNUM,
            'column':
            '{rio_number}, SerialNumber'.format(
                rio_number=CRATE_COL_RIO_NUMBER),
        },
        {
            'index_name': CRATE_IDX_MAX_DOCVER,
            'column': CRATE_COL_MAX_DOCVER,
        },
        {
            'index_name': CRATE_IDX_LAST_DOC,
            'column': CRATE_COL_LAST_DOC,
        },
    ])

    required_cols = ["SerialNumber", "RevisionID"]
    if not progargs.print:
        required_cols.extend(
            [CRATE_COL_MAX_DOCVER, CRATE_COL_LAST_DOC, CRATE_COL_RIO_NUMBER])
    ensure_columns_present(engine,
                           tablename=table.name,
                           column_names=required_cols)

    # Find the maximum SerialNumber for each note, and store it.
    # Slow query, even with index.
    log.info("Clinical documents table {}: updating {}".format(
        repr(table.name), repr(CRATE_COL_MAX_DOCVER)))
    execute(
        engine, """
        UPDATE p1
        SET p1.{max_docver_col} = subq.max_docver
        FROM {tablename} p1 JOIN (
            SELECT {rio_number}, SerialNumber, MAX(RevisionID) AS max_docver
            FROM {tablename} p2
            GROUP BY {rio_number}, SerialNumber
        ) subq
        ON subq.{rio_number} = p1.{rio_number}
        AND subq.SerialNumber = p1.SerialNumber
        WHERE p1.{max_docver_col} IS NULL
    """.format(
            max_docver_col=CRATE_COL_MAX_DOCVER,
            tablename=table.name,
            rio_number=CRATE_COL_RIO_NUMBER,
        ))

    # Set a single column accordingly
    log.info("Clinical documents table {}: updating {}".format(
        repr(table.name), repr(CRATE_COL_LAST_DOC)))
    execute(
        engine, """
        UPDATE {tablename} SET
            {last_doc_col} =
                CASE
                    WHEN RevisionID = {max_docver_col} THEN 1
                    ELSE 0
                END
        WHERE {last_doc_col} IS NULL
    """.format(
            tablename=table.name,
            last_doc_col=CRATE_COL_LAST_DOC,
            max_docver_col=CRATE_COL_MAX_DOCVER,
        ))
Beispiel #48
0
class Table(object):
    """Represents a table in a database and exposes common operations."""
    PRIMARY_DEFAULT = 'id'

    def __init__(self,
                 database,
                 table_name,
                 primary_id=None,
                 primary_type=None,
                 auto_create=False):
        """Initialise the table from database schema."""
        self.db = database
        self.name = normalize_table_name(table_name)
        self._table = None
        self._indexes = []
        self._primary_id = primary_id
        self._primary_type = primary_type
        self._auto_create = auto_create

    @property
    def exists(self):
        """Check to see if the table currently exists in the database."""
        if self._table is not None:
            return True
        return self.name in self.db

    @property
    def table(self):
        """Get a reference to the table, which may be reflected or created."""
        if self._table is None:
            self._sync_table(())
        return self._table

    @property
    def columns(self):
        """Get a listing of all columns that exist in the table."""
        if not self.exists:
            return []
        return self.table.columns.keys()

    def has_column(self, column):
        """Check if a column with the given name exists on this table."""
        return normalize_column_name(column) in self.columns

    def insert(self, row, ensure=None, types=None):
        """Add a ``row`` dict by inserting it into the table.

        If ``ensure`` is set, any of the keys of the row are not
        table columns, they will be created automatically.

        During column creation, ``types`` will be checked for a key
        matching the name of a column to be created, and the given
        SQLAlchemy column type will be used. Otherwise, the type is
        guessed from the row value, defaulting to a simple unicode
        field.
        ::

            data = dict(title='I am a banana!')
            table.insert(data)

        Returns the inserted row's primary key.
        """
        row = self._sync_columns(row, ensure, types=types)
        res = self.db.executable.execute(self.table.insert(row))
        if len(res.inserted_primary_key) > 0:
            return res.inserted_primary_key[0]
        return True

    def insert_ignore(self, row, keys, ensure=None, types=None):
        """Add a ``row`` dict into the table if the row does not exist.

        If rows with matching ``keys`` exist they will be added to the table.

        Setting ``ensure`` results in automatically creating missing columns,
        i.e., keys of the row are not table columns.

        During column creation, ``types`` will be checked for a key
        matching the name of a column to be created, and the given
        SQLAlchemy column type will be used. Otherwise, the type is
        guessed from the row value, defaulting to a simple unicode
        field.
        ::

            data = dict(id=10, title='I am a banana!')
            table.insert_ignore(data, ['id'])
        """
        row = self._sync_columns(row, ensure, types=types)
        if self._check_ensure(ensure):
            self.create_index(keys)
        args, _ = self._keys_to_args(row, keys)
        if self.count(**args) == 0:
            return self.insert(row, ensure=False)
        return False

    def insert_many(self, rows, chunk_size=1000, ensure=None, types=None):
        """Add many rows at a time.

        This is significantly faster than adding them one by one. Per default
        the rows are processed in chunks of 1000 per commit, unless you specify
        a different ``chunk_size``.

        See :py:meth:`insert() <dataset.Table.insert>` for details on
        the other parameters.
        ::

            rows = [dict(name='Dolly')] * 10000
            table.insert_many(rows)
        """
        chunk = []
        for row in rows:
            row = self._sync_columns(row, ensure, types=types)
            chunk.append(row)
            if len(chunk) == chunk_size:
                self.table.insert().execute(chunk)
                chunk = []

        if len(chunk):
            self.table.insert().execute(chunk)

    def update(self, row, keys, ensure=None, types=None, return_count=False):
        """Update a row in the table.

        The update is managed via the set of column names stated in ``keys``:
        they will be used as filters for the data to be updated, using the
        values in ``row``.
        ::

            # update all entries with id matching 10, setting their title columns
            data = dict(id=10, title='I am a banana!')
            table.update(data, ['id'])

        If keys in ``row`` update columns not present in the table, they will
        be created based on the settings of ``ensure`` and ``types``, matching
        the behavior of :py:meth:`insert() <dataset.Table.insert>`.
        """
        row = self._sync_columns(row, ensure, types=types)
        args, row = self._keys_to_args(row, keys)
        clause = self._args_to_clause(args)
        if not len(row):
            return self.count(clause)
        stmt = self.table.update(whereclause=clause, values=row)
        rp = self.db.executable.execute(stmt)
        if rp.supports_sane_rowcount():
            return rp.rowcount
        if return_count:
            return self.count(clause)

    def upsert(self, row, keys, ensure=None, types=None):
        """An UPSERT is a smart combination of insert and update.

        If rows with matching ``keys`` exist they will be updated, otherwise a
        new row is inserted in the table.
        ::

            data = dict(id=10, title='I am a banana!')
            table.upsert(data, ['id'])
        """
        row = self._sync_columns(row, ensure, types=types)
        if self._check_ensure(ensure):
            self.create_index(keys)
        row_count = self.update(row, keys, ensure=False, return_count=True)
        if row_count == 0:
            return self.insert(row, ensure=False)
        return True

    def delete(self, *clauses, **filters):
        """Delete rows from the table.

        Keyword arguments can be used to add column-based filters. The filter
        criterion will always be equality:
        ::

            table.delete(place='Berlin')

        If no arguments are given, all records are deleted.
        """
        if not self.exists:
            return False
        clause = self._args_to_clause(filters, clauses=clauses)
        stmt = self.table.delete(whereclause=clause)
        rp = self.db.executable.execute(stmt)
        return rp.rowcount > 0

    def _reflect_table(self):
        """Load the tables definition from the database."""
        with self.db.lock:
            try:
                self._table = SQLATable(self.name,
                                        self.db.metadata,
                                        schema=self.db.schema,
                                        autoload=True)
            except NoSuchTableError:
                pass

    def _threading_warn(self):
        if self.db.in_transaction and threading.active_count() > 1:
            warnings.warn(
                "Changing the database schema inside a transaction "
                "in a multi-threaded environment is likely to lead "
                "to race conditions and synchronization issues.",
                RuntimeWarning)

    def _sync_table(self, columns):
        """Lazy load, create or adapt the table structure in the database."""
        if self._table is None:
            # Load an existing table from the database.
            self._reflect_table()
        if self._table is None:
            # Create the table with an initial set of columns.
            if not self._auto_create:
                raise DatasetException("Table does not exist: %s" % self.name)
            # Keep the lock scope small because this is run very often.
            with self.db.lock:
                self._threading_warn()
                self._table = SQLATable(self.name,
                                        self.db.metadata,
                                        schema=self.db.schema)
                if self._primary_id is not False:
                    # This can go wrong on DBMS like MySQL and SQLite where
                    # tables cannot have no columns.
                    primary_id = self._primary_id or self.PRIMARY_DEFAULT
                    primary_type = self._primary_type or Types.integer
                    increment = primary_type in [Types.integer, Types.bigint]
                    column = Column(primary_id,
                                    primary_type,
                                    primary_key=True,
                                    autoincrement=increment)
                    self._table.append_column(column)
                for column in columns:
                    if not column.name == self._primary_id:
                        self._table.append_column(column)
                self._table.create(self.db.executable, checkfirst=True)
        elif len(columns):
            with self.db.lock:
                self._threading_warn()
                for column in columns:
                    if not self.has_column(column.name):
                        self.db.op.add_column(self.name, column,
                                              self.db.schema)
                self._reflect_table()

    def _sync_columns(self, row, ensure, types=None):
        """Create missing columns (or the table) prior to writes.

        If automatic schema generation is disabled (``ensure`` is ``False``),
        this will remove any keys from the ``row`` for which there is no
        matching column.
        """
        columns = self.columns
        ensure = self._check_ensure(ensure)
        types = types or {}
        types = {normalize_column_name(k): v for (k, v) in types.items()}
        out = {}
        sync_columns = []
        for name, value in row.items():
            name = normalize_column_name(name)
            if ensure and name not in columns:
                _type = types.get(name)
                if _type is None:
                    _type = self.db.types.guess(value)
                sync_columns.append(Column(name, _type))
                columns.append(name)
            if name in columns:
                out[name] = value
        self._sync_table(sync_columns)
        return out

    def _check_ensure(self, ensure):
        if ensure is None:
            return self.db.ensure_schema
        return ensure

    def _args_to_clause(self, args, clauses=()):
        clauses = list(clauses)
        for column, value in args.items():
            if not self.has_column(column):
                clauses.append(false())
            elif isinstance(value, (list, tuple)):
                clauses.append(self.table.c[column].in_(value))
            else:
                clauses.append(self.table.c[column] == value)
        return and_(*clauses)

    def _args_to_order_by(self, order_by):
        orderings = []
        for ordering in ensure_tuple(order_by):
            if ordering is None:
                continue
            column = ordering.lstrip('-')
            if column not in self.table.columns:
                continue
            if ordering.startswith('-'):
                orderings.append(self.table.c[column].desc())
            else:
                orderings.append(self.table.c[column].asc())
        return orderings

    def _keys_to_args(self, row, keys):
        keys = ensure_tuple(keys)
        keys = [normalize_column_name(k) for k in keys]
        # keys = [self.has_column(k) for k in keys]
        row = row.copy()
        args = {k: row.pop(k) for k in keys if k in row}
        return args, row

    def create_column(self, name, type):
        """Create a new column ``name`` of a specified type.
        ::

            table.create_column('created_at', db.types.datetime)
        """
        name = normalize_column_name(name)
        if self.has_column(name):
            log.debug("Column exists: %s" % name)
            return
        self._sync_table((Column(name, type), ))

    def create_column_by_example(self, name, value):
        """
        Explicitly create a new column ``name`` with a type that is appropriate
        to store the given example ``value``.  The type is guessed in the same
        way as for the insert method with ``ensure=True``.
        ::

            table.create_column_by_example('length', 4.2)

        If a column of the same name already exists, no action is taken, even
        if it is not of the type we would have created.
        """
        type_ = self.db.types.guess(value)
        self.create_column(name, type_)

    def drop_column(self, name):
        """Drop the column ``name``.
        ::
            table.drop_column('created_at')
        """
        if self.db.engine.dialect.name == 'sqlite':
            raise RuntimeError("SQLite does not support dropping columns.")
        name = normalize_column_name(name)
        with self.db.lock:
            if not self.exists or not self.has_column(name):
                log.debug("Column does not exist: %s", name)
                return

            self._threading_warn()
            self.db.op.drop_column(self.table.name, name, self.table.schema)
            self._reflect_table()

    def drop(self):
        """Drop the table from the database.

        Deletes both the schema and all the contents within it.
        """
        with self.db.lock:
            if self.exists:
                self._threading_warn()
                self.table.drop(self.db.executable, checkfirst=True)
                self._table = None

    def has_index(self, columns):
        """Check if an index exists to cover the given ``columns``."""
        if not self.exists:
            return False
        columns = set([normalize_column_name(c) for c in columns])
        if columns in self._indexes:
            return True
        for column in columns:
            if not self.has_column(column):
                return False
        indexes = self.db.inspect.get_indexes(self.name, schema=self.db.schema)
        for index in indexes:
            if columns == set(index.get('column_names', [])):
                self._indexes.append(columns)
                return True
        return False

    def create_index(self, columns, name=None, **kw):
        """Create an index to speed up queries on a table.

        If no ``name`` is given a random name is created.
        ::

            table.create_index(['name', 'country'])
        """
        columns = [normalize_column_name(c) for c in ensure_tuple(columns)]
        with self.db.lock:
            if not self.exists:
                raise DatasetException("Table has not been created yet.")

            for column in columns:
                if not self.has_column(column):
                    return

            if not self.has_index(columns):
                self._threading_warn()
                name = name or index_name(self.name, columns)
                columns = [self.table.c[c] for c in columns]
                idx = Index(name, *columns, **kw)
                idx.create(self.db.executable)

    def find(self, *_clauses, **kwargs):
        """Perform a simple search on the table.

        Simply pass keyword arguments as ``filter``.
        ::

            results = table.find(country='France')
            results = table.find(country='France', year=1980)

        Using ``_limit``::

            # just return the first 10 rows
            results = table.find(country='France', _limit=10)

        You can sort the results by single or multiple columns. Append a minus
        sign to the column name for descending order::

            # sort results by a column 'year'
            results = table.find(country='France', order_by='year')
            # return all rows sorted by multiple columns (descending by year)
            results = table.find(order_by=['country', '-year'])

        To perform complex queries with advanced filters or to perform
        aggregation, use :py:meth:`db.query() <dataset.Database.query>`
        instead.
        """
        if not self.exists:
            return []

        _limit = kwargs.pop('_limit', None)
        _offset = kwargs.pop('_offset', 0)
        order_by = kwargs.pop('order_by', None)
        _streamed = kwargs.pop('_streamed', False)
        _step = kwargs.pop('_step', QUERY_STEP)
        if _step is False or _step == 0:
            _step = None

        order_by = self._args_to_order_by(order_by)
        args = self._args_to_clause(kwargs, clauses=_clauses)
        query = self.table.select(whereclause=args,
                                  limit=_limit,
                                  offset=_offset)
        if len(order_by):
            query = query.order_by(*order_by)

        conn = self.db.executable
        if _streamed:
            conn = self.db.engine.connect()
            conn = conn.execution_options(stream_results=True)

        return ResultIter(conn.execute(query),
                          row_type=self.db.row_type,
                          step=_step)

    def find_one(self, *args, **kwargs):
        """Get a single result from the table.

        Works just like :py:meth:`find() <dataset.Table.find>` but returns one
        result, or ``None``.
        ::

            row = table.find_one(country='United States')
        """
        if not self.exists:
            return None

        kwargs['_limit'] = 1
        kwargs['_step'] = None
        resiter = self.find(*args, **kwargs)
        try:
            for row in resiter:
                return row
        finally:
            resiter.close()

    def count(self, *_clauses, **kwargs):
        """Return the count of results for the given filter set."""
        # NOTE: this does not have support for limit and offset since I can't
        # see how this is useful. Still, there might be compatibility issues
        # with people using these flags. Let's see how it goes.
        if not self.exists:
            return 0

        args = self._args_to_clause(kwargs, clauses=_clauses)
        query = select([func.count()], whereclause=args)
        query = query.select_from(self.table)
        rp = self.db.executable.execute(query)
        return rp.fetchone()[0]

    def __len__(self):
        """Return the number of rows in the table."""
        return self.count()

    def distinct(self, *args, **_filter):
        """Return all the unique (distinct) values for the given ``columns``.
        ::

            # returns only one row per year, ignoring the rest
            table.distinct('year')
            # works with multiple columns, too
            table.distinct('year', 'country')
            # you can also combine this with a filter
            table.distinct('year', country='China')
        """
        if not self.exists:
            return []

        filters = []
        for column, value in _filter.items():
            if not self.has_column(column):
                raise DatasetException("No such column: %s" % column)
            filters.append(self.table.c[column] == value)

        columns = []
        for column in args:
            if isinstance(column, ClauseElement):
                filters.append(column)
            else:
                if not self.has_column(column):
                    raise DatasetException("No such column: %s" % column)
                columns.append(self.table.c[column])

        if not len(columns):
            return []

        q = expression.select(columns,
                              distinct=True,
                              whereclause=and_(*filters),
                              order_by=[c.asc() for c in columns])
        return self.db.query(q)

    # Legacy methods for running find queries.
    all = find

    def __iter__(self):
        """Return all rows of the table as simple dictionaries.

        Allows for iterating over all rows in the table without explicetly
        calling :py:meth:`find() <dataset.Table.find>`.
        ::

            for row in table:
                print(row)
        """
        return self.find()

    def __repr__(self):
        """Get table representation."""
        return '<Table(%s)>' % self.table.name
Beispiel #49
0
class SQLTable(Component):

    _selects = 0
    _inserts = 0
    _updates = 0
    _finalized = False

    STORE_MODE_LOOKUP = "lookup"
    STORE_MODE_INSERT = "insert"
    STORE_MODE_UPSERT = "upsert"

    _pk = False

    name = None
    connection = None
    columns = []

    create = True

    sa_table = None
    sa_metadata = None

    _selects = 0
    _inserts = 0
    _unicode_errors = 0
    _lookup_changed_fields = None

    def __init__(self):
        super(SQLTable, self).__init__()
        self.columns = []

    def _get_sa_type(self, column):


        if (column["type"] == "Integer"):
            return Integer
        elif (column["type"] == "String"):
            if (not "length" in column):
                column["length"] = 128
            return Unicode(length = column["length"])
        elif (column["type"] == "Float"):
            return Float
        elif (column["type"] == "Boolean"):
            return Boolean
        elif (column["type"] == "AutoIncrement"):
            return Integer
        elif (column["type"] == "Date"):
            return Date
        elif (column["type"] == "Time"):
            return Time
        elif (column["type"] == "DateTime"):
            return DateTime
        else:
            raise Exception("Invalid data type: %s" % column["type"])

    def finalize(self, ctx):

        if (not SQLTable._finalized):
            SQLTable._finalized = True
            if (SQLTable._inserts + SQLTable._selects > 0):
                logger.info("SQLTable Totals  ins/upd/sel: %d/%d/%d " %
                            (SQLTable._inserts, SQLTable._updates, SQLTable._selects))

        if (self._inserts + self._selects > 0):
            logger.info("SQLTable %-18s ins/upd/sel: %6d/%6d/%-6d " %
                            (self.name, self._inserts, self._updates, self._selects))
        if (self._unicode_errors > 0):
            logger.warn("SQLTable %s found %d warnings assigning non-unicode fields to unicode columns" %
                        (self.name, self._unicode_errors))

        ctx.comp.finalize(self.connection)

        super(SQLTable, self).finalize(ctx)

    def initialize(self, ctx):

        super(SQLTable, self).initialize(ctx)

        if self._lookup_changed_fields == None:
            self._lookup_changed_fields = []

        ctx.comp.initialize(self.connection)

        logger.debug("Loading table %s on %s" % (self.name, self))


        self.sa_metadata = MetaData()
        self.sa_table = Table(self.name, self.sa_metadata)

        # Drop?

        columns_ex = []
        for column in self.columns:

            logger.debug("Adding column to %s: %s" % (self, column))

            # Check for duplicate names
            if (column["name"] in columns_ex):
                raise Exception("Duplicate column name '%s' in %s" % (column["name"], self))
            columns_ex.append(column["name"])

            # Configure column
            column["pk"] = False if (not "pk" in column) else parsebool(column["pk"])
            if (not "type" in column): column["type"] = "String"
            #if (not "value" in column): column["value"] = None
            self.sa_table.append_column( Column(column["name"],
                                                self._get_sa_type(column),
                                                primary_key = column["pk"],
                                                autoincrement = (True if column["type"] == "AutoIncrement" else False) ))

        # Check schema

        # Create if doesn't exist
        if (not self.connection.engine().has_table(self.name)):
            logger.info("Creating table %s" % self.name)
            self.sa_table.create(self.connection.connection())

        # Extend?

        # Delete columns?

    def pk(self, ctx):
        """
        Returns the primary key column definitToClauion, or None if none defined.
        """

        if (self._pk == False):
            pk_cols = []
            for col in self.columns:
                if ("pk" in col):
                    if parsebool(col["pk"]):
                        pk_cols.append(col)

            if (len(pk_cols) > 1):
                raise Exception("Table %s has multiple primary keys: %s" % (self.name, pk_cols))
            elif (len(pk_cols) == 1):
                self._pk = pk_cols[0]
            else:
                self._pk = None

        return self._pk

    def _attribsToClause(self, attribs):
        clauses = []
        for k, v in attribs.items():
            if isinstance(v, (list, tuple)):
                clauses.append(self.sa_table.c[k].in_(v))
            else:
                clauses.append(self.sa_table.c[k] == v)

        return and_(*clauses)

    def _rowtodict(self, row):

        d = {}
        for column in self.columns:
            #print column
            d[column["name"]] = getattr(row, column["name"])

        return d

    def _find(self, ctx, attribs):

        self._selects = self._selects + 1
        SQLTable._selects = SQLTable._selects + 1

        query = self.sa_table.select(self._attribsToClause(attribs))
        rows = self.connection.connection().execute(query)

        for r in rows:
            # Ensure we return dicts, not RowProxys from SqlAlchemy
            yield self._rowtodict(r)


    def lookup(self, ctx, attribs):

        logger.debug ("Lookup on '%s' attribs: %s" % (self, attribs))

        if (len(attribs.keys()) == 0):
            raise Exception("Cannot lookup on table '%s' with no criteria (empty attribute set)" % self.name)

        rows = self._find(ctx, attribs)
        rows = list(rows)
        if (len(rows) > 1):
            raise Exception("Found more than one row when searching for just one in table %s: %s" % (self.name, attribs))
        elif (len(rows) == 1):
            row = rows[0]
        else:
            row = None

        logger.debug("Lookup result on %s: %s = %s" % (self.name, attribs, row))
        return row

    def upsert(self, ctx, data, keys = []):
        """
        Upsert checks if the row exists and has changed. It does a lookup
        followe by an update or insert as appropriate.
        """

        # TODO: Check for AutoIncrement in keys, shall not be used

        # If keys
        qfilter = {}
        if (len(keys) > 0):
            for key in keys:
                try:
                    qfilter[key] = data[key]
                except KeyError as e:
                    raise Exception("Could not find attribute '%s' in data when storing row data: %s" % (key, data))
        else:
            pk = self.pk(ctx)
            qfilter[pk["name"]] = data[pk["name"]]

        # Do lookup
        if len(qfilter) > 0:

            row = self.lookup(ctx, qfilter)

            if (row):
                # Check row is identical
                for c in self.columns:
                    if c["type"] != "AutoIncrement":
                        v1 = row[c['name']]
                        v2 = data[c['name']]
                        if c["type"] == "Date":
                            v1 = row[c['name']].strftime('%Y-%m-%d')
                            v2 = data[c['name']].strftime('%Y-%m-%d')
                        if (isinstance(v1, basestring) or isinstance(v2, basestring)):
                            if (not isinstance(v1, basestring)): v1 = str(v1)
                            if (not isinstance(v2, basestring)): v2 = str(v2)
                        if (v1 != v2):
                            if (c["name"] not in self._lookup_changed_fields):
                                logger.warn("%s updating an entity that exists with different attributes, overwriting (field=%s, existing_value=%s, tried_value=%s)" % (self, c["name"], v1, v2))
                                #self._lookup_changed_fields.append(c["name"])

                # Update the row
                row = self.update(ctx, data, keys)
                return row

        row_with_id = self.insert(ctx, data)
        return row_with_id

    def _prepare_row(self, ctx, data):

        row = {}

        for column in self.columns:
            if (column["type"] != "AutoIncrement"):
                try:
                    row[column["name"]] = data[column["name"]]
                except KeyError, e:
                    raise Exception("Missing attribute for column %s in table '%s' while inserting row: %s" % (e, self.name, data))

                # Checks
                if ((column["type"] == "String") and (not isinstance(row[column["name"]], unicode))):
                    self._unicode_errors = self._unicode_errors + 1
                    if (ctx.debug):
                        logger.warn("Unicode column %r received non-unicode string: %r " % (column["name"], row[column["name"]]))

        return row