def create_table(self, table_name, columns, ctypes, keys): """ This method is used to create new table in the database. Input: Database configuration object and table name Output: New table created """ #before creating a new table check if that table exists self.table_name = table_name table_flag = self.check_if_table_exists(table_name) if table_flag: print 'Table already exists in the database. No need to create a new table' else: #create a new table since it does not exist print 'Table - %s does not exist. Create a new table' %(table_name) #get the description of the table table_columns = self.get_table_desc(columns, ctypes, keys) try: new_table = Table( self.table_name, self.metadata, *table_columns ) #create new table new_table.create(checkfirst = True) print "Table '%s' created"%self.table_name except: print 'Error while creating the table %s'%self.table_name raise Exception
def upgrade(migrate_engine): meta.bind = migrate_engine rbac = Table('rbac', meta, Column('id', UUID(), default=utils.generate_uuid, primary_key=True), Column('created_at', DateTime, default=lambda: timeutils.utcnow()), Column('updated_at', DateTime, onupdate=lambda: timeutils.utcnow()), Column('project_id', UUID(), nullable=False, unique=False), Column('object_id', UUID(), nullable=False, unique=False), Column('target_tenant', UUID(), nullable=False, unique=False), Column('rbac_action', String(255), nullable=False, unique=False), mysql_engine='InnoDB', mysql_charset='utf8') rbac.create(checkfirst=True)
def test_clone_table_adds_or_deletes_columns(self): meta = MetaData() meta.bind = self.engine table = Table('dummy', meta, Column('id', String(36), primary_key=True, nullable=False), Column('A', Boolean, default=False) ) table.create() newcols = [ Column('B', Boolean, default=False), Column('C', String(255), default='foobar') ] ignorecols = [ table.c.A.name ] new_table = migrate_utils.clone_table('new_dummy', table, meta, newcols=newcols, ignorecols=ignorecols) col_names = [c.name for c in new_table.columns] self.assertEqual(3, len(col_names)) self.assertIsNotNone(new_table.c.B) self.assertIsNotNone(new_table.c.C) self.assertNotIn('A', col_names)
def test_clone_table_adds_or_deletes_columns(self): meta = MetaData() meta.bind = self.engine table = Table( 'dummy', meta, Column('id', String(36), primary_key=True, nullable=False), Column('A', Boolean, default=False)) table.create() newcols = [ Column('B', Boolean, default=False), Column('C', String(255), default='foobar') ] ignorecols = [table.c.A.name] new_table = migrate_utils.clone_table('new_dummy', table, meta, newcols=newcols, ignorecols=ignorecols) col_names = [c.name for c in new_table.columns] self.assertEqual(3, len(col_names)) self.assertIsNotNone(new_table.c.B) self.assertIsNotNone(new_table.c.C) self.assertNotIn('A', col_names)
def upgrade(migrate_engine): """Create shares and share_access_map tables.""" meta = MetaData() meta.bind = migrate_engine shares = Table('shares', meta, autoload=True) share_snapshots = Table( 'share_snapshots', meta, Column('created_at', DateTime), Column('updated_at', DateTime), Column('deleted_at', DateTime), Column('deleted', Boolean), Column('id', String(length=36), primary_key=True, nullable=False), Column('user_id', String(length=255)), Column('project_id', String(length=255)), Column('share_id', String(36), ForeignKey('shares.id'), nullable=False), Column('size', Integer), Column('status', String(length=255)), Column('progress', String(length=255)), Column('display_name', String(length=255)), Column('display_description', String(length=255)), Column('share_size', Integer), Column('share_proto', String(length=255)), Column('export_location', String(255)), mysql_engine='InnoDB') try: share_snapshots.create() except Exception: LOG.error(_("Table %r not created!"), share_snapshots) raise
def create_table(self, table_name, columns, ctypes, keys): """ This method is used to create new table in the database. Input: Database configuration object and table name Output: New table created """ #before creating a new table check if that table exists self.table_name = table_name table_flag = self.check_if_table_exists(table_name) if table_flag: print 'Table already exists in the database. No need to create a new table' else: #create a new table since it does not exist print 'Table - %s does not exist. Create a new table' % ( table_name) #get the description of the table table_columns = self.get_table_desc(columns, ctypes, keys) try: new_table = Table(self.table_name, self.metadata, *table_columns) #create new table new_table.create(checkfirst=True) print "Table '%s' created" % self.table_name except: print 'Error while creating the table %s' % self.table_name raise Exception
def create_table(self, table_name, primary_id='id', primary_type='Integer'): """ Creates a new table. The new table will automatically have an `id` column unless specified via optional parameter primary_id, which will be used as the primary key of the table. Automatic id is set to be an auto-incrementing integer, while the type of custom primary_id can be a String or an Integer as specified with primary_type flag. The default length of String is 255. The caller can specify the length. The caller will be responsible for the uniqueness of manual primary_id. This custom id feature is only available via direct create_table call. Returns a :py:class:`Table <dataset.Table>` instance. :: table = db.create_table('population') # custom id and type table2 = db.create_table('population2', 'age') table3 = db.create_table('population3', primary_id='race', primary_type='String') # custom length of String table4 = db.create_table('population4', primary_id='race', primary_type='String(50)') """ table_name = self._valid_table_name(table_name) self._acquire() try: log.debug("Creating table: %s on %r" % (table_name, self.engine)) match = re.match(r'^(Integer)$|^(String)(\(\d+\))?$', primary_type) if match: if match.group(1) == 'Integer': auto_flag = False if primary_id == 'id': auto_flag = True col = Column(primary_id, Integer, primary_key=True, autoincrement=auto_flag) elif not match.group(3): col = Column(primary_id, String(255), primary_key=True) else: len_string = int(match.group(3)[1:-1]) len_string = min(len_string, 255) col = Column(primary_id, String(len_string), primary_key=True) else: raise DatasetException( "The primary_type has to be either 'Integer' or 'String'.") table = SQLATable(table_name, self.metadata, schema=self.schema) table.append_column(col) table.create(self.engine) self._tables[table_name] = table return Table(self, table) finally: self._release()
def add_aspect(self, name, *columns): table = Table( name, self.metadata, Column('id', types.Integer, ForeignKey("entity.id")), *columns) table.create() self.aspects[name] = [x.name for x in columns] self.aspect_names = set(self.aspects.iterkeys())
def wrap(fn): table_definition = TableDefinition() fn(table_definition) table = Table(name, self.meta) for attrname in table_definition.fields.keys(): args, kw = table_definition.fields[attrname] table.append_column(Column(attrname, *args, **kw)) table.create()
def _create_table(self, table_name): table_name = validate_name(table_name) log.debug("Creating table: %s on %r" % (table_name, self.engine)) table = Table(table_name, self.meta) col = Column(ID_COLUMN, Integer, primary_key=True) table.append_column(col) table.create(self.engine) return table
def create_table(engine, table_name): log.debug("Creating table: %s on %r" % (table_name, engine)) table = Table(table_name, engine._metadata) col = Column('id', Integer, primary_key=True) table.append_column(col) table.create(engine) TABLES[engine][table_name] = table return table
def __init__(self, registry=None): self.registry = registry self.engine = create_engine('sqlite://') self.metadata = MetaData(self.engine) entity = Table( 'entity', self.metadata, Column('id', types.Integer, primary_key=True)) entity.create() self.aspects = propdict()
def create_table(engine, table_name): with lock: log.debug("Creating table: %s on %r" % (table_name, engine)) table = Table(table_name, engine._metadata) col = Column("id", Integer, primary_key=True) table.append_column(col) table.create(engine) engine._tables[table_name] = table return table
def create_table(engine, table_name): with lock: log.debug("Creating table: %s on %r" % (table_name, engine)) table = Table(table_name, engine._metadata) col = Column('id', Integer, primary_key=True) table.append_column(col) table.create(engine) engine._tables[table_name] = table return table
def test_insert_values(self, engine, connection): table = Table('insert_test', MetaData(bind=engine), Column('a', sqlalchemy.types.Integer)) table.drop(checkfirst=True) table.create() connection.execute(table.insert([{'a': 1}, {'a': 2}])) result = table.select().execute().fetchall() expected = [(1, ), (2, )] self.assertEqual(result, expected)
def create_table(self, table_name, primary_id='id', primary_type='Integer'): """ Creates a new table. The new table will automatically have an `id` column unless specified via optional parameter primary_id, which will be used as the primary key of the table. Automatic id is set to be an auto-incrementing integer, while the type of custom primary_id can be a String or an Integer as specified with primary_type flag. The default length of String is 255. The caller can specify the length. The caller will be responsible for the uniqueness of manual primary_id. This custom id feature is only available via direct create_table call. Returns a :py:class:`Table <dataset.Table>` instance. :: table = db.create_table('population') # custom id and type table2 = db.create_table('population2', 'age') table3 = db.create_table('population3', primary_id='race', primary_type='String') # custom length of String table4 = db.create_table('population4', primary_id='race', primary_type='String(50)') """ table_name = self._valid_table_name(table_name) self._acquire() try: log.debug("Creating table: %s on %r" % (table_name, self.engine)) match = re.match(r'^(Integer)$|^(String)(\(\d+\))?$', primary_type) if match: if match.group(1) == 'Integer': auto_flag = False if primary_id == 'id': auto_flag = True col = Column(primary_id, Integer, primary_key=True, autoincrement=auto_flag) elif not match.group(3): col = Column(primary_id, String(255), primary_key=True) else: len_string = int(match.group(3)[1:-1]) len_string = min(len_string, 255) col = Column(primary_id, String(len_string), primary_key=True) else: raise DatasetException( "The primary_type has to be either 'Integer' or 'String'.") table = SQLATable(table_name, self.metadata, schema=self.schema) table.append_column(col) # always add date_added and date_updated to tables table.append_column(Column('created', DateTime, default=datetime.utcnow)) table.append_column(Column('updated', DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)) table.create(self.engine) self._tables[table_name] = table return Table(self, table) finally: self._release()
def wrap(fn): table_definition = OrderedProperties() fn(table_definition) table = Table(table_name, g.db_meta) for attrname in table_definition.keys(): value = table_definition[attrname] if isinstance(value, Column): table.append_column(value) elif isinstance(value, Constraint): table.append_constraint(value) table.create(g.db_engine)
def test_insert_select(self, engine, connection): one_row = Table('one_row', MetaData(bind=engine), autoload=True) table = Table('insert_test', MetaData(bind=engine), Column('a', sqlalchemy.types.Integer)) table.drop(checkfirst=True) table.create() connection.execute(table.insert().from_select(['a'], one_row.select())) result = table.select().execute().fetchall() expected = [(1, )] self.assertEqual(result, expected)
def add_aspect(self, name, *columns): table = Table( name, self.metadata, Column('id', types.Integer, ForeignKey("entity.id")), *columns) table.create() self.aspects[name] = propdict( ((x.name, x) for x in columns), _name=name, _table=table) self._aspect_names = set(self.aspects.keys())
def test_insert_values(self, engine, connection): table = Table('insert_test', MetaData(bind=engine), Column('a', sqlalchemy.types.Integer), schema='pyhive_test_database') table.drop(checkfirst=True) table.create() connection.execute(table.insert([{'a': 1}, {'a': 2}])) result = table.select().execute().fetchall() expected = [(1,), (2,)] self.assertEqual(result, expected)
def create_table(cls, name, columns_data, pkey): columns = [ Column(col_name, col_type, primary_key=(col_name == pkey)) for col_name, col_type in columns_data.items() ] table = Table(name, cls._metadata, *columns) table.create(bind=cls._ctx.engine) base = automap_base(metadata=cls._metadata) base.prepare() return getattr(base.classes, name)
def test_migrate_data(self): meta = MetaData(bind=self.engine) # create TableA table_a = Table('TableA', meta, Column('id', Integer, primary_key=True), Column('first', String(8), nullable=False), Column('second', Integer)) table_a.create() # update it with sample data values = [ {'id': 1, 'first': 'a'}, {'id': 2, 'first': 'b'}, {'id': 3, 'first': 'c'} ] for value in values: self.engine.execute(table_a.insert(values=value)) # create TableB similar to TableA, except column 'second' table_b = Table('TableB', meta, Column('id', Integer, primary_key=True), Column('first', String(8), nullable=False)) table_b.create() # migrate data migrate_utils.migrate_data(self.engine, table_a, table_b, ['second']) # validate table_a is dropped self.assertTrue(self.engine.dialect.has_table( self.engine.connect(), 'TableA'), 'Data migration failed to drop source table') # validate table_b is updated with data from table_a table_b_rows = list(table_b.select().execute()) self.assertEqual(3, len(table_b_rows), "Data migration is failed") table_b_values = [] for row in table_b_rows: table_b_values.append({'id': row.id, 'first': row.first}) self.assertEqual(values, table_b_values, "Data migration failed with invalid data copy")
def load_raw(eng, handle, dummy_separator='\v', table_name=None): """ Uses SQLalchemy (and optionally psycopg2) to load raw data into a one-column table. Returns SQLAlchemy Table object Parameters ========== eng: required Must be a SQLAlchemy engine object. handle: required Must be a file-like object. I.e., returned by pyhcup.parser._open(). dummy_separator: required (default: "\v") Must be a character not found in the data to be loaded. The psycopg2.cursor.copy_from() method uses a horizontal tab ('\t') by default, but I am afraid of occasional horizontal tabs in HCUP and PUDF data that we just haven't found yet. table_name: optional (default: None) Table name for the load. Will be generated automatically if not provided. """ # get the filename sans extension for use in making a table name base_filename = os.path.split(handle.name)[-1].split('.')[0] # make a timestamp in YYYYMMDDhhmmss format # will be used as part of the table name now = datetime.datetime.now() timestamp = now.strftime('%Y%m%d%H%M%S') if table_name is None: table_name = '%s_%s_raw' % (base_filename, timestamp) # else SQLAlchemy will fail to reflect later (seems to coerce case-sensitive) table_name = table_name.lower() # proceed to table creation table = Table(table_name, MetaData(), Column('line', Text())) table.create(bind=eng) if eng.driver == 'psycopg2': # use Postgres COPY FROM conn = eng.raw_connection() cursor = conn.cursor() # acquire a cursor from the connection object # load the data using psycopg2.cursor.copy_from() method cursor.copy_from(handle, '"%s"' % table_name, sep=dummy_separator) conn.commit() conn.close() else: # fall back to line-by-line insert data = [{'line': l.strip()} for l in handle] eng.execute(table.insert(), data) return table
def build_database(self): self.connect(test=False) if not os.path.isfile(self.name): sess = self.get_session() meta = MetaData() bt = Table('LabTable', meta, Column('id', Integer, primary_key=True), Column('labnumber', Integer), Column('aliquot', Integer), Column('collection_path', String(200)), Column('repository_path', String(200)), Column('create_date', DateTime)) bt.create(sess.bind)
def test_insert_select(self, engine, connection): one_row = Table('one_row', MetaData(bind=engine), autoload=True) table = Table('insert_test', MetaData(bind=engine), Column('a', sqlalchemy.types.Integer), schema='pyhive_test_database') table.drop(checkfirst=True) table.create() connection.execute('SET mapred.job.tracker=local') # NOTE(jing) I'm stuck on a version of Hive without INSERT ... VALUES connection.execute(table.insert().from_select(['a'], one_row.select())) result = table.select().execute().fetchall() expected = [(1,)] self.assertEqual(result, expected)
class TableHandler(object): """ Used by automatically generated objects such as datasets and dimensions to generate, write and clear the table under its management. """ def _init_table(self, meta, namespace, name, id_type=Integer): """ Create the given table if it does not exist, otherwise reflect the current table schema from the database. """ name = namespace + '__' + name self.table = Table(name, meta) if id_type is not None: col = Column('id', id_type, primary_key=True) self.table.append_column(col) def _generate_table(self): """ Create the given table if it does not exist. """ # TODO: make this support some kind of migration? if not db.engine.has_table(self.table.name): self.table.create(db.engine) def _upsert(self, bind, data, unique_columns): """ Upsert a set of values into the table. This will query for the set of unique columns and either update an existing row or create a new one. In both cases, the ID of the changed row will be returned. """ key = and_(*[self.table.c[c] == data.get(c) for c in unique_columns]) q = self.table.update(key, data) if bind.execute(q).rowcount == 0: q = self.table.insert(data) rs = bind.execute(q) return rs.inserted_primary_key[0] else: q = self.table.select(key) row = bind.execute(q).fetchone() return row['id'] def _flush(self, bind): """ Delete all rows in the table. """ q = self.table.delete() bind.execute(q) def _drop(self, bind): """ Drop the table and the local reference to it. """ if db.engine.has_table(self.table.name): self.table.drop() del self.table
def test_migrate_data(self): meta = MetaData(bind=self.engine) # create TableA table_a = Table('TableA', meta, Column('id', Integer, primary_key=True), Column('first', String(8), nullable=False), Column('second', Integer)) table_a.create() # update it with sample data values = [{ 'id': 1, 'first': 'a' }, { 'id': 2, 'first': 'b' }, { 'id': 3, 'first': 'c' }] for value in values: self.engine.execute(table_a.insert(values=value)) # create TableB similar to TableA, except column 'second' table_b = Table('TableB', meta, Column('id', Integer, primary_key=True), Column('first', String(8), nullable=False)) table_b.create() # migrate data migrate_utils.migrate_data(self.engine, table_a, table_b, ['second']) # validate table_a is dropped self.assertTrue( self.engine.dialect.has_table(self.engine.connect(), 'TableA'), 'Data migration failed to drop source table') # validate table_b is updated with data from table_a table_b_rows = list(table_b.select().execute()) self.assertEqual(3, len(table_b_rows), "Data migration is failed") table_b_values = [] for row in table_b_rows: table_b_values.append({'id': row.id, 'first': row.first}) self.assertEqual(values, table_b_values, "Data migration failed with invalid data copy")
def create_table(self, table_name, columns): """Create a table called table_name in the set database with the given schema (a dictionary of field_name:field_type). Note that table constraints are not added. """ self.reflect() if self.table_exists(table_name): return kwargs = {} if self.protocol_manager.uses_schemas: kwargs = {'schema': self.database_name} new_table = Table(table_name, self.metadata, *columns, **kwargs) new_table.create(checkfirst=True) return new_table
def test_nullable_reflection(self): t = Table( "t", self.metadata, Column("a", Integer, nullable=True), Column("b", Integer, nullable=False), ) t.create() eq_( dict((col["name"], col["nullable"]) for col in inspect(self.metadata.bind).get_columns("t")), { "A": True, "B": False }, )
def create_table(self, table_name, primary_id='id', primary_type='Integer'): """ Creates a new table. The new table will automatically have an `id` column unless specified via optional parameter primary_id, which will be used as the primary key of the table. Automatic id is set to be an auto-incrementing integer, while the type of custom primary_id can be a Text or an Integer as specified with primary_type flag. The caller will be responsible for the uniqueness of manual primary_id. This custom id feature is only available via direct create_table call. Returns a :py:class:`Table <dataset.Table>` instance. :: table = db.create_table('population') # custom id and type table2 = db.create_table('population2', 'age') table3 = db.create_table('population3', primary_id='race', primary_type='Text') """ self._acquire() try: log.debug("Creating table: %s on %r" % (table_name, self.engine)) table = SQLATable(table_name, self.metadata) if primary_type is 'Integer': auto_flag = False if primary_id is 'id': auto_flag = True col = Column(primary_id, Integer, primary_key=True, autoincrement=auto_flag) elif primary_type is 'Text': col = Column(primary_id, Text, primary_key=True) else: raise DatasetException( "The primary_type has to be either 'Integer' or 'Text'.") table.append_column(col) table.create(self.engine) self._tables[table_name] = table return Table(self, table) finally: self._release()
def dte_load(eng, handle, table_name): """ Uses SQLAlchemy (and optionally psycopg2) to load DaysToEvent data from a csv file into a database table. Uses default schema; no support for specifying schema inside this function. Returns the row count of the newly created table. Parameters ========== eng: required Must be a SQLAlchemy engine object. handle: required Must be a file-like object. I.e., returned by open(path). table_name: required Table name for the load. """ table = Table(table_name, MetaData(), Column('key', BigInteger), Column('visitlink', BigInteger), Column('daystoevent', BigInteger) ) table.create(eng, checkfirst=True) if eng.driver == 'psycopg2': # use Postgres COPY FROM conn = eng.raw_connection() cursor = conn.cursor() # acquire a cursor from the connection object cp_sql = "COPY %s FROM STDIN DELIMITER ',' CSV HEADER;" % (table_name) cursor.copy_expert(cp_sql, handle) conn.commit() conn.close() else: # fall back to generic bulk insert data = [] for line in handle: l = [int(x.strip()) for x in line.strip().split(',')] data.append({ 'key': l[0], 'visitlink': l[1], 'daystoevent': l[2] }) eng.execute(table.insert(), data) row_count = eng.execute(select([func.count()]).select_from(table)).fetchone()[0] return row_count
def test_lots_of_types(self, engine, connection): # take type list from sqlalchemy.types types = [ 'INT', 'CHAR', 'VARCHAR', 'NCHAR', 'TEXT', 'Text', 'FLOAT', 'NUMERIC', 'DECIMAL', 'TIMESTAMP', 'DATETIME', 'CLOB', 'BLOB', 'BOOLEAN', 'SMALLINT', 'DATE', 'TIME', 'String', 'Integer', 'SmallInteger', 'Numeric', 'Float', 'DateTime', 'Date', 'Time', 'LargeBinary', 'Boolean', 'Unicode', 'UnicodeText', ] cols = [] for i, t in enumerate(types): cols.append(Column(str(i), getattr(sqlalchemy.types, t))) table = Table('test_table', MetaData(bind=engine), *cols) table.drop(checkfirst=True) table.create() table.drop()
def create_table(self, table_name, columns): """Create a table called table_name in the set database with the given schema (a dictionary of field_name:field_type). Note that table constraints are not added. """ self.reflect() if self.table_exists(table_name): return kwargs = {} if self.protocol_manager.uses_schemas: kwargs = {"schema": self.database_name} new_table = Table(table_name, self.metadata, *columns, **kwargs) new_table.create(checkfirst=True) return new_table
def upgrade(migrate_engine): meta.bind = migrate_engine keys = Enum(name='key', *ZONE_ATTRIBUTE_KEYS) domain_attributes_table = Table( 'domain_attributes', meta, Column('id', UUID(), default=utils.generate_uuid, primary_key=True), Column('version', Integer(), default=1, nullable=False), Column('created_at', DateTime, default=lambda: timeutils.utcnow()), Column('updated_at', DateTime, onupdate=lambda: timeutils.utcnow()), Column('key', keys), Column('value', String(255), nullable=False), Column('domain_id', UUID(), nullable=False), UniqueConstraint('key', 'value', 'domain_id', name='unique_attributes'), ForeignKeyConstraint(['domain_id'], ['domains.id'], ondelete='CASCADE'), mysql_engine='INNODB', mysql_charset='utf8' ) domains_table = Table('domains', meta, autoload=True) types = Enum(name='types', metadata=meta, *ZONE_TYPES) types.create() # Add type and transferred_at to domains type_ = Column('type', types, default='PRIMARY', server_default='PRIMARY') transferred_at = Column('transferred_at', DateTime, default=None) type_.create(domains_table, populate_default=True) transferred_at.create(domains_table, populate_default=True) domain_attributes_table.create() dialect = migrate_engine.url.get_dialect().name if dialect.startswith('sqlite'): constraint = UniqueConstraint( 'name', 'deleted', name='unique_domain_name', table=domains_table) # Add missing unique index constraint.create()
def create_table(self, table_name): """ Creates a new table. The new table will automatically have an `id` column, which is set to be an auto-incrementing integer as the primary key of the table. Returns a :py:class:`Table <dataset.Table>` instance. :: table = db.create_table('population') """ with self.lock: log.debug("Creating table: %s on %r" % (table_name, self.engine)) table = SQLATable(table_name, self.metadata) col = Column('id', Integer, primary_key=True) table.append_column(col) table.create(self.engine) self._tables[table_name] = table return Table(self, table)
def linktab(self): if not hasattr(self, '_linktab'): if self.engine.has_table(self.linktab_name): self._linktab = Table(self.linktab_name, self.meta, autoload=True) else: table = Table(self.linktab_name, self.meta) col = Column('view', Unicode, index=True) table.append_column(col) col = Column('serial', Unicode(40)) table.append_column(col) col = Column('key', Unicode, index=True) table.append_column(col) col = Column('fingerprint', Unicode(255), index=True) table.append_column(col) table.create(self.engine) self._linktab = table return self._linktab
def upgrade(migrate_engine): meta.bind = migrate_engine status_enum = Enum(name='service_statuses', metadata=meta, *SERVICE_STATES) status_enum.create() service_status_table = Table('service_statuses', meta, Column('id', UUID(), default=utils.generate_uuid, primary_key=True), Column('created_at', DateTime), Column('updated_at', DateTime), Column('service_name', String(40), nullable=False), Column('hostname', String(255), nullable=False), Column('heartbeated_at', DateTime, nullable=True), Column('status', status_enum, nullable=False), Column('stats', Text, nullable=False), Column('capabilities', Text, nullable=False), ) service_status_table.create()
def test_clone_table_retains_constraints(self): meta = MetaData() meta.bind = self.engine parent = Table('parent', meta, Column('id', String(36), primary_key=True, nullable=False), Column('A', Integer), Column('B', Integer), Column('C', Integer, CheckConstraint('C>100', name="above 100")), Column('D', Integer, unique=True), UniqueConstraint('A', 'B', name='uix_1') ) parent.create() child = Table('child', meta, Column('id', String(36), ForeignKey('parent.id', name="parent_ref"), primary_key=True, nullable=False), Column('A', Boolean, default=False) ) child.create() ignorecols = [ parent.c.D.name, ] new_parent = migrate_utils.clone_table('new_parent', parent, meta, ignorecols=ignorecols) new_child = migrate_utils.clone_table('new_child', child, meta) self.assertTrue(_has_constraint(new_parent.constraints, UniqueConstraint, 'uix_1')) self.assertTrue(_has_constraint(new_parent.c.C.constraints, CheckConstraint, 'above 100')) self.assertTrue(_has_constraint(new_child.constraints, ForeignKeyConstraint, 'parent_ref'))
def create_table(self, table_name): """ Creates a new table. The new table will automatically have an `id` column, which is set to be an auto-incrementing integer as the primary key of the table. Returns a :py:class:`Table <dataset.Table>` instance. :: table = db.create_table('population') """ self._acquire() try: log.debug("Creating table: %s on %r" % (table_name, self.engine)) table = SQLATable(table_name, self.metadata) col = Column('id', Integer, primary_key=True) table.append_column(col) table.create(self.engine) self._tables[table_name] = table return Table(self, table) finally: self._release()
def test_clone_table_swaps_columns(self): meta = MetaData() meta.bind = self.engine table = Table("dummy1", meta, Column('id', String(36), primary_key=True, nullable=False), Column('A', Boolean, default=False), ) table.create() swapcols = { 'A': Column('A', Integer, default=1), } new_table = migrate_utils.clone_table('swap_dummy', table, meta, swapcols=swapcols) self.assertIsNotNone(new_table.c.A) self.assertEqual(Integer, type(new_table.c.A.type))
def test_clone_table_ignores_constraints(self): meta = MetaData() meta.bind = self.engine table = Table( 'constraints_check', meta, Column('id', String(36), primary_key=True, nullable=False), Column('A', Integer), Column('B', Integer), Column('C', Integer, CheckConstraint('C>100', name="above 100")), UniqueConstraint('A', 'B', name='uix_1')) table.create() ignorecons = [ 'uix_1', ] new_table = migrate_utils.clone_table('constraints_check_tmp', table, meta, ignorecons=ignorecons) self.assertFalse( _has_constraint(new_table.constraints, UniqueConstraint, 'uix_1'))
def test_lots_of_types(self, engine, connection): # Presto doesn't have raw CREATE TABLE support, so we ony test hive # take type list from sqlalchemy.types types = [ 'INT', 'CHAR', 'VARCHAR', 'NCHAR', 'TEXT', 'Text', 'FLOAT', 'NUMERIC', 'DECIMAL', 'TIMESTAMP', 'DATETIME', 'CLOB', 'BLOB', 'BOOLEAN', 'SMALLINT', 'DATE', 'TIME', 'String', 'Integer', 'SmallInteger', 'Numeric', 'Float', 'DateTime', 'Date', 'Time', 'Binary', 'Boolean', 'Unicode', 'UnicodeText', ] cols = [] for i, t in enumerate(types): cols.append(Column(str(i), getattr(sqlalchemy.types, t))) cols.append(Column('hive_date', HiveDate)) cols.append(Column('hive_decimal', HiveDecimal)) cols.append(Column('hive_timestamp', HiveTimestamp)) table = Table('test_table', MetaData(bind=engine), *cols, schema='pyhive_test_database') table.drop(checkfirst=True) table.create() connection.execute('SET mapred.job.tracker=local') connection.execute('USE pyhive_test_database') big_number = 10 ** 10 - 1 connection.execute(""" INSERT OVERWRITE TABLE test_table SELECT 1, "a", "a", "a", "a", "a", 0.1, 0.1, 0.1, 0, 0, "a", "a", false, 1, 0, 0, "a", 1, 1, 0.1, 0.1, 0, 0, 0, "a", false, "a", "a", 0, %d, 123 + 2000 FROM default.one_row """, big_number) row = connection.execute(table.select()).fetchone() self.assertEqual(row.hive_date, datetime.date(1970, 1, 1)) self.assertEqual(row.hive_decimal, decimal.Decimal(big_number)) self.assertEqual(row.hive_timestamp, datetime.datetime(1970, 1, 1, 0, 0, 2, 123)) table.drop()
def test_clone_table_retains_constraints(self): meta = MetaData() meta.bind = self.engine parent = Table( 'parent', meta, Column('id', String(36), primary_key=True, nullable=False), Column('A', Integer), Column('B', Integer), Column('C', Integer, CheckConstraint('C>100', name="above 100")), Column('D', Integer, unique=True), UniqueConstraint('A', 'B', name='uix_1')) parent.create() child = Table( 'child', meta, Column('id', String(36), ForeignKey('parent.id', name="parent_ref"), primary_key=True, nullable=False), Column('A', Boolean, default=False)) child.create() ignorecols = [ parent.c.D.name, ] new_parent = migrate_utils.clone_table('new_parent', parent, meta, ignorecols=ignorecols) new_child = migrate_utils.clone_table('new_child', child, meta) self.assertTrue( _has_constraint(new_parent.constraints, UniqueConstraint, 'uix_1')) self.assertTrue( _has_constraint(new_parent.c.C.constraints, CheckConstraint, 'above 100')) self.assertTrue( _has_constraint(new_child.constraints, ForeignKeyConstraint, 'parent_ref'))
def create_table(self, schema_name, table_name, columns_configuration, drop_first): metadata = MetaData() table = Table(table_name, metadata, schema=schema_name) for column_configuration in columns_configuration: table.append_column( self.create_column(column_configuration["destination"])) table.append_column( Column( Providers.AuditColumnsNames.TIMESTAMP, DateTime(timezone=True), server_default=func.now(), )) table.append_column( Column( Providers.AuditColumnsNames.IS_DELETED, Boolean, server_default="f", default=False, )) table.append_column( Column(Providers.AuditColumnsNames.CHANGE_VERSION, BigInteger)) if drop_first: self.logger.debug(f"Dropping table {schema_name}.{table_name}") table.drop(self.target_db, checkfirst=True) self.logger.debug(f"Dropped table {schema_name}.{table_name}") self.logger.debug(f"Creating table {schema_name}.{table_name}") table.create(self.target_db, checkfirst=False) self.logger.debug(f"Created table {schema_name}.{table_name}") return
def test_clone_table_swaps_columns(self): meta = MetaData() meta.bind = self.engine table = Table( "dummy1", meta, Column('id', String(36), primary_key=True, nullable=False), Column('A', Boolean, default=False), ) table.create() swapcols = { 'A': Column('A', Integer, default=1), } new_table = migrate_utils.clone_table('swap_dummy', table, meta, swapcols=swapcols) self.assertIsNotNone(new_table.c.A) self.assertEqual(Integer, type(new_table.c.A.type))
def test_clone_table_ignores_constraints(self): meta = MetaData() meta.bind = self.engine table = Table('constraints_check', meta, Column('id', String(36), primary_key=True, nullable=False), Column('A', Integer), Column('B', Integer), Column('C', Integer, CheckConstraint('C>100', name="above 100")), UniqueConstraint('A', 'B', name='uix_1') ) table.create() ignorecons = [ 'uix_1', ] new_table = migrate_utils.clone_table('constraints_check_tmp', table, meta, ignorecons=ignorecons) self.assertFalse(_has_constraint(new_table.constraints, UniqueConstraint, 'uix_1'))
def postgresql_create_table(metadata, engine, columns, object_name): table = Table(object_name, metadata, *columns, extend_existing=True) table.create(engine, checkfirst=True) logging.info('created table in database: %s', object_name)
class SQLTable(Component): _selects = 0 _inserts = 0 _updates = 0 _finalized = False STORE_MODE_LOOKUP = "lookup" STORE_MODE_INSERT = "insert" STORE_MODE_UPSERT = "upsert" _pk = False columns = [] create = True _unicode_errors = 0 _lookup_changed_fields = None def __init__(self, name, connection, columns, label=None): super(SQLTable, self).__init__() self.sa_table = None self.sa_metadata = None self.name = name self.connection = connection self.label = label if label else name self.columns = columns or [] for col in columns: col.sqltable = self def _get_sa_type(self, column): if (column.type == "Integer"): return Integer elif (column.type == "String"): #if (column.length is None): column.length = 128 return Unicode(length = 128) elif (column.type == "Float"): return Float elif (column.type == "Boolean"): return Boolean elif (column.type == "AutoIncrement"): return Integer elif (column.type == "Date"): return Date elif (column.type == "Time"): return Time elif (column.type == "DateTime"): return DateTime elif (column.type == "Binary"): return Binary else: raise Exception("Invalid data type (%s): %s" % (column, column.type)) def finalize(self, ctx): if (not SQLTable._finalized): SQLTable._finalized = True if (SQLTable._inserts + SQLTable._selects > 0): logger.info("SQLTable Totals ins/upd/sel: %d/%d/%d " % (SQLTable._inserts, SQLTable._updates, SQLTable._selects)) if (self._inserts + self._selects > 0): logger.info("SQLTable %-18s ins/upd/sel: %6d/%6d/%-6d " % (self.name, self._inserts, self._updates, self._selects)) if (self._unicode_errors > 0): logger.warning("SQLTable %s found %d warnings assigning non-unicode fields to unicode columns" % (self.name, self._unicode_errors)) ctx.comp.finalize(self.connection) super(SQLTable, self).finalize(ctx) def initialize(self, ctx): super(SQLTable, self).initialize(ctx) if self._lookup_changed_fields == None: self._lookup_changed_fields = [] ctx.comp.initialize(self.connection) logger.debug("Loading table %s on %s" % (self.name, self)) self.sa_metadata = MetaData() self.sa_table = Table(self.name, self.sa_metadata) self._selects = 0 self._inserts = 0 self._updates = 0 self._unicode_errors = 0 # Drop? columns_ex = [] for column in self.columns: logger.debug("Adding column to %s: %s" % (self, column)) column.sqltable = self # Check for duplicate names if (column.name in columns_ex): raise ETLConfigurationException("Duplicate column name '%s' in %s" % (column.name, self)) columns_ex.append(column.name) # Configure column if isinstance(column, SQLColumnFK): if column.fk_sqlcolumn.sqltable.sa_table is None: logger.warning("Column %s foreign key %s table (%s) has not been defined in backend (ignoring).", column, column.fk_sqlcolumn, column.fk_sqlcolumn.sqltable) continue self.sa_table.append_column(Column(column.name, self._get_sa_type(column), ForeignKey(column.fk_sqlcolumn.sqltable.sa_table.columns[column.fk_sqlcolumn.name]), primary_key=column.pk, nullable=column.nullable, autoincrement=(True if column.type == "AutoIncrement" else False))) else: self.sa_table.append_column(Column(column.name, self._get_sa_type(column), primary_key=column.pk, nullable=column.nullable, autoincrement=(True if column.type == "AutoIncrement" else False))) # Check schema: # Create if doesn't exist if (not self.connection.engine().has_table(self.name)): logger.info("Creating table %s" % self.name) self.sa_table.create(self.connection.connection()) # TODO:? Extend? (unsafe, allow read-only connections and make them default?) # TODO:? Delete columns (unsafe, allow read-only connections and make them default?) def pk(self, ctx): """ Returns the primary key column definitToClauion, or None if none defined. """ #if (self._pk == False): if True: pk_cols = [] for col in self.columns: if col.pk: pk_cols.append(col) if (len(pk_cols) > 1): raise Exception("Table %s has multiple primary keys: %s" % (self.name, pk_cols)) elif (len(pk_cols) == 1): self._pk = pk_cols[0] else: self._pk = None return self._pk def _attribsToClause(self, attribs): clauses = [] for k, v in attribs.items(): if isinstance(v, (list, tuple)): clauses.append(self.sa_table.c[k].in_(v)) else: clauses.append(self.sa_table.c[k] == v) return and_(*clauses) def _rowtodict(self, row): d = {} for column in self.columns: #print column d[column.name] = getattr(row, column.name) return d def _find(self, ctx, attribs): self._selects = self._selects + 1 SQLTable._selects = SQLTable._selects + 1 query = self.sa_table.select(self._attribsToClause(attribs)) rows = self.connection.connection().execute(query) for r in rows: # Ensure we return dicts, not RowProxys from SqlAlchemy yield self._rowtodict(r) def lookup(self, ctx, attribs, find_function=None): logger.debug ("Lookup on '%s' attribs: %s" % (self, attribs)) if (len(attribs.keys()) == 0): raise Exception("Cannot lookup on table '%s' with no criteria (empty attribute set)" % self.name) find_function = find_function or self._find rows = find_function(ctx, attribs) rows = list(rows) if (len(rows) > 1): raise Exception("Found more than one row when searching for just one in table %s: %s" % (self.name, attribs)) elif (len(rows) == 1): row = rows[0] else: row = None logger.debug("Lookup result on %s: %s = %s" % (self.name, attribs, row)) return row def upsert(self, ctx, data, keys = []): """ Upsert checks if the row exists and has changed. It does a lookup followed by an update or insert as appropriate. """ # TODO: Check for AutoIncrement in keys, shall not be used # If keys qfilter = {} if (len(keys) > 0): for key in keys: try: qfilter[key] = data[key] except KeyError as e: raise Exception("Could not find attribute '%s' in data when storing row data: %s" % (key, data)) else: pk = self.pk(ctx) qfilter[pk.name] = data[pk.name] # Do lookup if len(qfilter) > 0: row = self.lookup(ctx, qfilter) if (row): # Check row is identical for c in self.columns: if c.type != "AutoIncrement": v1 = row[c.name] v2 = data[c.name] if c.type == "Date": v1 = row[c.name].strftime('%Y-%m-%d') v2 = data[c.name].strftime('%Y-%m-%d') if (isinstance(v1, str) or isinstance(v2, str)): if (not isinstance(v1, str)): v1 = str(v1) if (not isinstance(v2, str)): v2 = str(v2) if (v1 != v2): if (c.name not in self._lookup_changed_fields): logger.warn("%s updating an entity that exists with different attributes, overwriting (field=%s, existing_value=%s, tried_value=%s)" % (self, c.name, v1, v2)) #self._lookup_changed_fields.append(c["name"]) # Update the row row = self.update(ctx, data, keys) return row row_with_id = self.insert(ctx, data) return row_with_id def _prepare_row(self, ctx, data): row = {} for column in self.columns: if column.type != "AutoIncrement": try: row[column.name] = data[column.name] except KeyError as e: raise Exception("Missing attribute for column %s in table '%s' while inserting row: %s" % (e, self.name, data)) # Checks if (column.type == "String") and (not isinstance(row[column.name], str)): self._unicode_errors = self._unicode_errors + 1 if (ctx.debug): logger.warn("Unicode column %r received non-unicode string: %r " % (column.name, row[column.name])) return row def insert(self, ctx, data): row = self._prepare_row(ctx, data) logger.debug("Inserting in table '%s' row: %s" % (self.name, row)) res = self.connection.connection().execute(self.sa_table.insert(row)) pk = self.pk(ctx) if pk: row[pk.name] = res.inserted_primary_key[0] self._inserts = self._inserts + 1 SQLTable._inserts = SQLTable._inserts + 1 if pk is not None: return row else: return row # None def update(self, ctx, data, keys = []): row = self._prepare_row(ctx, data) # Automatically calculate lookup if necessary qfilter = {} if (len(keys) > 0): for key in keys: try: qfilter[key] = data[key] except KeyError as e: raise Exception("Could not find attribute '%s' in data when storing row data: %s" % (key, data)) else: pk = self.pk(ctx) qfilter[pk.name] = data[pk.name] logger.debug("Updating in table '%s' row: %s" % (self.name, row)) res = self.connection.connection().execute(self.sa_table.update(self._attribsToClause(qfilter), row)) self._updates = self._updates +1 SQLTable._updates = SQLTable._updates + 1 if pk is not None: return row else: return None
from sqlalchemy.types import Unicode engine = create_engine('postgresql://python@localhost/') loader = PostgresLoader(engine) testmodule = loader.load_module('testmodule') metadata = MetaData(bind=engine) table = Table('testtable', metadata, Column('test', Unicode), Column('test2', Unicode)) table.drop(checkfirst=True) table.create(checkfirst=True) for i in range(20): table.insert({'test': 'test%d' % i, 'test2': 'test%d' %i}).execute() print(engine.execute(testmodule.pyconcat(table.c.test, table.c.test2)).fetchall()) statement = """ CREATE TRIGGER mytrigger BEFORE INSERT ON %s FOR EACH ROW EXECUTE PROCEDURE %s(); """ engine.execute(statement % (table.name, testmodule.nullifying_trigger.__name__))
class Table(object): def __init__(self, db, schema, table, columns=None): self.db = db self.schema = schema self.name = table self.engine = create_engine(db.url) self.metadata = MetaData(schema=schema) self.metadata.bind = self.engine # http://docs.sqlalchemy.org/en/rel_1_0/core/metadata.html # if provided columns (SQLAlchemy columns), create the table if table: if columns: self.table = SQLATable( table, self.metadata, schema=self.schema, *columns ) self.table.create() # otherwise just load from db else: self.table = SQLATable( table, self.metadata, schema=self.schema, autoload=True ) self.indexes = dict((i.name, i) for i in self.table.indexes) self._is_dropped = False else: self._is_dropped = True self.table = None @property def _normalized_columns(self): return list(map(normalize_column_name, self.columns)) @property def columns(self): """Return list of all columns in table """ return list(self.table.columns.keys()) @property def sqla_columns(self): """Return all columns in table as sqlalchemy column types """ return self.table.columns @property def column_types(self): """Return a dict mapping column name to type for all columns in table """ column_types = {} for c in self.sqla_columns: column_types[c.name] = c.type return column_types @property def primary_key(self): """Return a list of columns making up the primary key constraint """ return [c.name for c in self.table.primary_key] @property def op(self): ctx = MigrationContext.configure(self.engine.connect()) return Operations(ctx) def _valid_table_name(self, table_name): """Check if the table name is obviously invalid. """ if table_name is None or not len(table_name.strip()): raise ValueError("Invalid table name: %r" % table_name) return table_name.strip() def _update_table(self, table_name): self.metadata = MetaData(schema=self.schema) self.metadata.bind = self.engine return SQLATable(table_name, self.metadata, schema=self.schema) def add_primary_key(self, column="id"): """Add primary key constraint to specified column """ if not self.primary_key: sql = """ALTER TABLE {s}.{t} ADD PRIMARY KEY ({c}) """.format( s=self.schema, t=self.name, c=column ) self.db.execute(sql) def drop(self): """Drop the table from the database """ if self._is_dropped is False: self.table.drop(self.engine) self._is_dropped = True def _check_dropped(self): if self._is_dropped: raise DatasetException( "the table has been dropped. this object should not be used again." ) def _args_to_clause(self, args): clauses = [] for k, v in args.items(): if isinstance(v, (list, tuple)): clauses.append(self.table.c[k].in_(v)) else: clauses.append(self.table.c[k] == v) return and_(*clauses) def create_column(self, name, type): """ Explicitely create a new column ``name`` of a specified type. ``type`` must be a `SQLAlchemy column type <http://docs.sqlalchemy.org/en/rel_0_8/core/types.html>`_. :: table.create_column('created_at', sqlalchemy.DateTime) """ self._check_dropped() if normalize_column_name(name) not in self._normalized_columns: self.op.add_column(self.table.name, Column(name, type), self.table.schema) self.table = self._update_table(self.table.name) def drop_column(self, name): """ Drop the column ``name`` :: table.drop_column('created_at') """ self._check_dropped() if name in list(self.table.columns.keys()): self.op.drop_column(self.table.name, name, schema=self.schema) self.table = self._update_table(self.table.name) def create_index(self, columns, name=None, index_type="btree"): """ Create an index to speed up queries on a table. If no ``name`` is given a random name is created. :: table.create_index(['name', 'country']) """ self._check_dropped() if not name: sig = "||".join(columns + [index_type]) # This is a work-around for a bug in <=0.6.1 which would create # indexes based on hash() rather than a proper hash. key = abs(hash(sig)) name = "ix_%s_%s" % (self.table.name, key) if name in self.indexes: return self.indexes[name] key = sha1(sig.encode("utf-8")).hexdigest()[:16] name = "ix_%s_%s" % (self.table.name, key) if name in self.indexes: return self.indexes[name] # self.db._acquire() columns = [self.table.c[col] for col in columns] idx = Index(name, *columns, postgresql_using=index_type) idx.create(self.engine) # finally: # self.db._release() self.indexes[name] = idx return idx def create_index_geom(self, column="geom"): """Shortcut to create index on geometry """ self.create_index([column], index_type="gist") def distinct(self, *columns, **_filter): """ Returns all rows of a table, but removes rows in with duplicate values in ``columns``. Interally this creates a `DISTINCT statement <http://www.w3schools.com/sql/sql_distinct.asp>`_. :: # returns only one row per year, ignoring the rest table.distinct('year') # works with multiple columns, too table.distinct('year', 'country') # you can also combine this with a filter table.distinct('year', country='China') """ self._check_dropped() qargs = [] try: columns = [self.table.c[c] for c in columns] for col, val in _filter.items(): qargs.append(self.table.c[col] == val) except KeyError: return [] q = expression.select( columns, distinct=True, whereclause=and_(*qargs), order_by=[c.asc() for c in columns], ) # if just looking at one column, return a simple list if len(columns) == 1: return itertools.chain.from_iterable(self.engine.execute(q)) # otherwise return specified row_type else: return ResultIter(self.engine.execute(q), row_type=self.db.row_type) def insert(self, row): """ Add a row (type: dict) by inserting it into the table. Columns must exist. :: data = dict(title='I am a banana!') table.insert(data) Returns the inserted row's primary key. """ self._check_dropped() res = self.engine.execute(self.table.insert(row)) if len(res.inserted_primary_key) > 0: return res.inserted_primary_key[0] def insert_many(self, rows, chunk_size=1000): """ Add many rows at a time, which is significantly faster than adding them one by one. Per default the rows are processed in chunks of 1000 per commit, unless you specify a different ``chunk_size``. See :py:meth:`insert() <dataset.Table.insert>` for details on the other parameters. :: rows = [dict(name='Dolly')] * 10000 table.insert_many(rows) """ def _process_chunk(chunk): self.table.insert().execute(chunk) self._check_dropped() chunk = [] for i, row in enumerate(rows, start=1): chunk.append(row) if i % chunk_size == 0: _process_chunk(chunk) chunk = [] if chunk: _process_chunk(chunk) def rename(self, name): """Rename the table """ sql = """ALTER TABLE {s}.{t} RENAME TO {name} """.format( s=self.schema, t=self.name, name=name ) self.engine.execute(sql) self.table = SQLATable(name, self.metadata, schema=self.schema, autoload=True) def find_one(self, **kwargs): """ Works just like :py:meth:`find() <dataset.Table.find>` but returns one result, or None. :: row = table.find_one(country='United States') """ kwargs["_limit"] = 1 iterator = self.find(**kwargs) try: return next(iterator) except StopIteration: return None def _args_to_order_by(self, order_by): if order_by[0] == "-": return self.table.c[order_by[1:]].desc() else: return self.table.c[order_by].asc() def find( self, _limit=None, _offset=0, _step=5000, order_by="id", return_count=False, **_filter ): """ Performs a simple search on the table. Simply pass keyword arguments as ``filter``. :: results = table.find(country='France') results = table.find(country='France', year=1980) Using ``_limit``:: # just return the first 10 rows results = table.find(country='France', _limit=10) You can sort the results by single or multiple columns. Append a minus sign to the column name for descending order:: # sort results by a column 'year' results = table.find(country='France', order_by='year') # return all rows sorted by multiple columns (by year in descending order) results = table.find(order_by=['country', '-year']) By default :py:meth:`find() <dataset.Table.find>` will break the query into chunks of ``_step`` rows to prevent huge tables from being loaded into memory at once. For more complex queries, please use :py:meth:`db.query()` instead.""" self._check_dropped() if not isinstance(order_by, (list, tuple)): order_by = [order_by] order_by = [ o for o in order_by if (o.startswith("-") and o[1:] or o) in self.table.columns ] order_by = [self._args_to_order_by(o) for o in order_by] args = self._args_to_clause(_filter) # query total number of rows first count_query = alias( self.table.select(whereclause=args, limit=_limit, offset=_offset), name="count_query_alias", ).count() rp = self.engine.execute(count_query) total_row_count = rp.fetchone()[0] if return_count: return total_row_count if _limit is None: _limit = total_row_count if _step is None or _step is False or _step == 0: _step = total_row_count if total_row_count > _step and not order_by: _step = total_row_count log.warn( "query cannot be broken into smaller sections because it is unordered" ) queries = [] for i in count(): qoffset = _offset + (_step * i) qlimit = min(_limit - (_step * i), _step) if qlimit <= 0: break queries.append( self.table.select( whereclause=args, limit=qlimit, offset=qoffset, order_by=order_by ) ) return ResultIter( (self.engine.execute(q) for q in queries), row_type=self.db.row_type ) def count(self, **_filter): """ Return the count of results for the given filter set (same filter options as with ``find()``). """ return self.find(return_count=True, **_filter) def __getitem__(self, item): """ This is an alias for distinct which allows the table to be queried as using square bracket syntax. :: # Same as distinct: print list(table['year']) """ if not isinstance(item, tuple): item = (item,) return self.distinct(*item) def all(self): """ Returns all rows of the table as simple dictionaries. This is simply a shortcut to *find()* called with no arguments. :: rows = table.all()""" return self.find() def __iter__(self): """ Allows for iterating over all rows in the table without explicetly calling :py:meth:`all() <dataset.Table.all>`. :: for row in table: print(row) """ return self.all() def __repr__(self): return "<Table(%s)>" % self.table.name
class Table(object): """Represents a table in a database and exposes common operations.""" PRIMARY_DEFAULT = 'id' def __init__(self, database, table_name, primary_id=None, primary_type=None, auto_create=False): """Initialise the table from database schema.""" self.db = database self.name = normalize_table_name(table_name) self._table = None self._indexes = [] self._primary_id = primary_id self._primary_type = primary_type self._auto_create = auto_create @property def exists(self): """Check to see if the table currently exists in the database.""" if self._table is not None: return True return self.name in self.db @property def table(self): """Get a reference to the table, which may be reflected or created.""" if self._table is None: self._sync_table(()) return self._table @property def columns(self): """Get a listing of all columns that exist in the table.""" if not self.exists: return [] return self.table.columns.keys() def has_column(self, column): """Check if a column with the given name exists on this table.""" return normalize_column_name(column) in self.columns def insert(self, row, ensure=None, types=None): """Add a ``row`` dict by inserting it into the table. If ``ensure`` is set, any of the keys of the row are not table columns, they will be created automatically. During column creation, ``types`` will be checked for a key matching the name of a column to be created, and the given SQLAlchemy column type will be used. Otherwise, the type is guessed from the row value, defaulting to a simple unicode field. :: data = dict(title='I am a banana!') table.insert(data) Returns the inserted row's primary key. """ row = self._sync_columns(row, ensure, types=types) res = self.db.executable.execute(self.table.insert(row)) if len(res.inserted_primary_key) > 0: return res.inserted_primary_key[0] return True def insert_ignore(self, row, keys, ensure=None, types=None): """Add a ``row`` dict into the table if the row does not exist. If rows with matching ``keys`` exist they will be added to the table. Setting ``ensure`` results in automatically creating missing columns, i.e., keys of the row are not table columns. During column creation, ``types`` will be checked for a key matching the name of a column to be created, and the given SQLAlchemy column type will be used. Otherwise, the type is guessed from the row value, defaulting to a simple unicode field. :: data = dict(id=10, title='I am a banana!') table.insert_ignore(data, ['id']) """ row = self._sync_columns(row, ensure, types=types) if self._check_ensure(ensure): self.create_index(keys) args, _ = self._keys_to_args(row, keys) if self.count(**args) == 0: return self.insert(row, ensure=False) return False def insert_many(self, rows, chunk_size=1000, ensure=None, types=None): """Add many rows at a time. This is significantly faster than adding them one by one. Per default the rows are processed in chunks of 1000 per commit, unless you specify a different ``chunk_size``. See :py:meth:`insert() <dataset.Table.insert>` for details on the other parameters. :: rows = [dict(name='Dolly')] * 10000 table.insert_many(rows) """ chunk = [] for row in rows: row = self._sync_columns(row, ensure, types=types) chunk.append(row) if len(chunk) == chunk_size: self.table.insert().execute(chunk) chunk = [] if len(chunk): self.table.insert().execute(chunk) def update(self, row, keys, ensure=None, types=None, return_count=False): """Update a row in the table. The update is managed via the set of column names stated in ``keys``: they will be used as filters for the data to be updated, using the values in ``row``. :: # update all entries with id matching 10, setting their title columns data = dict(id=10, title='I am a banana!') table.update(data, ['id']) If keys in ``row`` update columns not present in the table, they will be created based on the settings of ``ensure`` and ``types``, matching the behavior of :py:meth:`insert() <dataset.Table.insert>`. """ row = self._sync_columns(row, ensure, types=types) args, row = self._keys_to_args(row, keys) clause = self._args_to_clause(args) if not len(row): return self.count(clause) stmt = self.table.update(whereclause=clause, values=row) rp = self.db.executable.execute(stmt) if rp.supports_sane_rowcount(): return rp.rowcount if return_count: return self.count(clause) def upsert(self, row, keys, ensure=None, types=None): """An UPSERT is a smart combination of insert and update. If rows with matching ``keys`` exist they will be updated, otherwise a new row is inserted in the table. :: data = dict(id=10, title='I am a banana!') table.upsert(data, ['id']) """ row = self._sync_columns(row, ensure, types=types) if self._check_ensure(ensure): self.create_index(keys) row_count = self.update(row, keys, ensure=False, return_count=True) if row_count == 0: return self.insert(row, ensure=False) return True def delete(self, *clauses, **filters): """Delete rows from the table. Keyword arguments can be used to add column-based filters. The filter criterion will always be equality: :: table.delete(place='Berlin') If no arguments are given, all records are deleted. """ if not self.exists: return False clause = self._args_to_clause(filters, clauses=clauses) stmt = self.table.delete(whereclause=clause) rp = self.db.executable.execute(stmt) return rp.rowcount > 0 def _reflect_table(self): """Load the tables definition from the database.""" with self.db.lock: try: self._table = SQLATable(self.name, self.db.metadata, schema=self.db.schema, autoload=True) except NoSuchTableError: pass def _threading_warn(self): if self.db.in_transaction and threading.active_count() > 1: warnings.warn( "Changing the database schema inside a transaction " "in a multi-threaded environment is likely to lead " "to race conditions and synchronization issues.", RuntimeWarning) def _sync_table(self, columns): """Lazy load, create or adapt the table structure in the database.""" if self._table is None: # Load an existing table from the database. self._reflect_table() if self._table is None: # Create the table with an initial set of columns. if not self._auto_create: raise DatasetException("Table does not exist: %s" % self.name) # Keep the lock scope small because this is run very often. with self.db.lock: self._threading_warn() self._table = SQLATable(self.name, self.db.metadata, schema=self.db.schema) if self._primary_id is not False: # This can go wrong on DBMS like MySQL and SQLite where # tables cannot have no columns. primary_id = self._primary_id or self.PRIMARY_DEFAULT primary_type = self._primary_type or Types.integer increment = primary_type in [Types.integer, Types.bigint] column = Column(primary_id, primary_type, primary_key=True, autoincrement=increment) self._table.append_column(column) for column in columns: if not column.name == self._primary_id: self._table.append_column(column) self._table.create(self.db.executable, checkfirst=True) elif len(columns): with self.db.lock: self._threading_warn() for column in columns: if not self.has_column(column.name): self.db.op.add_column(self.name, column, self.db.schema) self._reflect_table() def _sync_columns(self, row, ensure, types=None): """Create missing columns (or the table) prior to writes. If automatic schema generation is disabled (``ensure`` is ``False``), this will remove any keys from the ``row`` for which there is no matching column. """ columns = self.columns ensure = self._check_ensure(ensure) types = types or {} types = {normalize_column_name(k): v for (k, v) in types.items()} out = {} sync_columns = [] for name, value in row.items(): name = normalize_column_name(name) if ensure and name not in columns: _type = types.get(name) if _type is None: _type = self.db.types.guess(value) sync_columns.append(Column(name, _type)) columns.append(name) if name in columns: out[name] = value self._sync_table(sync_columns) return out def _check_ensure(self, ensure): if ensure is None: return self.db.ensure_schema return ensure def _args_to_clause(self, args, clauses=()): clauses = list(clauses) for column, value in args.items(): if not self.has_column(column): clauses.append(false()) elif isinstance(value, (list, tuple)): clauses.append(self.table.c[column].in_(value)) else: clauses.append(self.table.c[column] == value) return and_(*clauses) def _args_to_order_by(self, order_by): orderings = [] for ordering in ensure_tuple(order_by): if ordering is None: continue column = ordering.lstrip('-') if column not in self.table.columns: continue if ordering.startswith('-'): orderings.append(self.table.c[column].desc()) else: orderings.append(self.table.c[column].asc()) return orderings def _keys_to_args(self, row, keys): keys = ensure_tuple(keys) keys = [normalize_column_name(k) for k in keys] # keys = [self.has_column(k) for k in keys] row = row.copy() args = {k: row.pop(k) for k in keys if k in row} return args, row def create_column(self, name, type): """Create a new column ``name`` of a specified type. :: table.create_column('created_at', db.types.datetime) """ name = normalize_column_name(name) if self.has_column(name): log.debug("Column exists: %s" % name) return self._sync_table((Column(name, type), )) def create_column_by_example(self, name, value): """ Explicitly create a new column ``name`` with a type that is appropriate to store the given example ``value``. The type is guessed in the same way as for the insert method with ``ensure=True``. :: table.create_column_by_example('length', 4.2) If a column of the same name already exists, no action is taken, even if it is not of the type we would have created. """ type_ = self.db.types.guess(value) self.create_column(name, type_) def drop_column(self, name): """Drop the column ``name``. :: table.drop_column('created_at') """ if self.db.engine.dialect.name == 'sqlite': raise RuntimeError("SQLite does not support dropping columns.") name = normalize_column_name(name) with self.db.lock: if not self.exists or not self.has_column(name): log.debug("Column does not exist: %s", name) return self._threading_warn() self.db.op.drop_column(self.table.name, name, self.table.schema) self._reflect_table() def drop(self): """Drop the table from the database. Deletes both the schema and all the contents within it. """ with self.db.lock: if self.exists: self._threading_warn() self.table.drop(self.db.executable, checkfirst=True) self._table = None def has_index(self, columns): """Check if an index exists to cover the given ``columns``.""" if not self.exists: return False columns = set([normalize_column_name(c) for c in columns]) if columns in self._indexes: return True for column in columns: if not self.has_column(column): return False indexes = self.db.inspect.get_indexes(self.name, schema=self.db.schema) for index in indexes: if columns == set(index.get('column_names', [])): self._indexes.append(columns) return True return False def create_index(self, columns, name=None, **kw): """Create an index to speed up queries on a table. If no ``name`` is given a random name is created. :: table.create_index(['name', 'country']) """ columns = [normalize_column_name(c) for c in ensure_tuple(columns)] with self.db.lock: if not self.exists: raise DatasetException("Table has not been created yet.") for column in columns: if not self.has_column(column): return if not self.has_index(columns): self._threading_warn() name = name or index_name(self.name, columns) columns = [self.table.c[c] for c in columns] idx = Index(name, *columns, **kw) idx.create(self.db.executable) def find(self, *_clauses, **kwargs): """Perform a simple search on the table. Simply pass keyword arguments as ``filter``. :: results = table.find(country='France') results = table.find(country='France', year=1980) Using ``_limit``:: # just return the first 10 rows results = table.find(country='France', _limit=10) You can sort the results by single or multiple columns. Append a minus sign to the column name for descending order:: # sort results by a column 'year' results = table.find(country='France', order_by='year') # return all rows sorted by multiple columns (descending by year) results = table.find(order_by=['country', '-year']) To perform complex queries with advanced filters or to perform aggregation, use :py:meth:`db.query() <dataset.Database.query>` instead. """ if not self.exists: return [] _limit = kwargs.pop('_limit', None) _offset = kwargs.pop('_offset', 0) order_by = kwargs.pop('order_by', None) _streamed = kwargs.pop('_streamed', False) _step = kwargs.pop('_step', QUERY_STEP) if _step is False or _step == 0: _step = None order_by = self._args_to_order_by(order_by) args = self._args_to_clause(kwargs, clauses=_clauses) query = self.table.select(whereclause=args, limit=_limit, offset=_offset) if len(order_by): query = query.order_by(*order_by) conn = self.db.executable if _streamed: conn = self.db.engine.connect() conn = conn.execution_options(stream_results=True) return ResultIter(conn.execute(query), row_type=self.db.row_type, step=_step) def find_one(self, *args, **kwargs): """Get a single result from the table. Works just like :py:meth:`find() <dataset.Table.find>` but returns one result, or ``None``. :: row = table.find_one(country='United States') """ if not self.exists: return None kwargs['_limit'] = 1 kwargs['_step'] = None resiter = self.find(*args, **kwargs) try: for row in resiter: return row finally: resiter.close() def count(self, *_clauses, **kwargs): """Return the count of results for the given filter set.""" # NOTE: this does not have support for limit and offset since I can't # see how this is useful. Still, there might be compatibility issues # with people using these flags. Let's see how it goes. if not self.exists: return 0 args = self._args_to_clause(kwargs, clauses=_clauses) query = select([func.count()], whereclause=args) query = query.select_from(self.table) rp = self.db.executable.execute(query) return rp.fetchone()[0] def __len__(self): """Return the number of rows in the table.""" return self.count() def distinct(self, *args, **_filter): """Return all the unique (distinct) values for the given ``columns``. :: # returns only one row per year, ignoring the rest table.distinct('year') # works with multiple columns, too table.distinct('year', 'country') # you can also combine this with a filter table.distinct('year', country='China') """ if not self.exists: return [] filters = [] for column, value in _filter.items(): if not self.has_column(column): raise DatasetException("No such column: %s" % column) filters.append(self.table.c[column] == value) columns = [] for column in args: if isinstance(column, ClauseElement): filters.append(column) else: if not self.has_column(column): raise DatasetException("No such column: %s" % column) columns.append(self.table.c[column]) if not len(columns): return [] q = expression.select(columns, distinct=True, whereclause=and_(*filters), order_by=[c.asc() for c in columns]) return self.db.query(q) # Legacy methods for running find queries. all = find def __iter__(self): """Return all rows of the table as simple dictionaries. Allows for iterating over all rows in the table without explicetly calling :py:meth:`find() <dataset.Table.find>`. :: for row in table: print(row) """ return self.find() def __repr__(self): """Get table representation.""" return '<Table(%s)>' % self.table.name
class SQLTable(Component): _selects = 0 _inserts = 0 _updates = 0 _finalized = False STORE_MODE_LOOKUP = "lookup" STORE_MODE_INSERT = "insert" STORE_MODE_UPSERT = "upsert" _pk = False columns = [] create = True _unicode_errors = 0 _lookup_changed_fields = None def __init__(self, name, connection, columns, label=None): super(SQLTable, self).__init__() self.sa_table = None self.sa_metadata = None self.name = name self.connection = connection self.label = label if label else name self.columns = columns or [] for col in columns: col.sqltable = self def _get_sa_type(self, column): if (column.type == "Integer"): return Integer elif (column.type == "String"): #if (column.length is None): column.length = 128 return Unicode(length=128) elif (column.type == "Float"): return Float elif (column.type == "Boolean"): return Boolean elif (column.type == "AutoIncrement"): return Integer elif (column.type == "Date"): return Date elif (column.type == "Time"): return Time elif (column.type == "DateTime"): return DateTime elif (column.type == "Binary"): return Binary else: raise Exception("Invalid data type (%s): %s" % (column, column.type)) def finalize(self, ctx): if (not SQLTable._finalized): SQLTable._finalized = True if (SQLTable._inserts + SQLTable._selects > 0): logger.info( "SQLTable Totals ins/upd/sel: %d/%d/%d " % (SQLTable._inserts, SQLTable._updates, SQLTable._selects)) if (self._inserts + self._selects > 0): logger.info( "SQLTable %-18s ins/upd/sel: %6d/%6d/%-6d " % (self.name, self._inserts, self._updates, self._selects)) if (self._unicode_errors > 0): logger.warning( "SQLTable %s found %d warnings assigning non-unicode fields to unicode columns" % (self.name, self._unicode_errors)) ctx.comp.finalize(self.connection) super(SQLTable, self).finalize(ctx) def initialize(self, ctx): super(SQLTable, self).initialize(ctx) if self._lookup_changed_fields == None: self._lookup_changed_fields = [] ctx.comp.initialize(self.connection) logger.debug("Loading table %s on %s" % (self.name, self)) self.sa_metadata = MetaData() self.sa_table = Table(self.name, self.sa_metadata) self._selects = 0 self._inserts = 0 self._updates = 0 self._unicode_errors = 0 # Drop? columns_ex = [] for column in self.columns: logger.debug("Adding column to %s: %s" % (self, column)) column.sqltable = self # Check for duplicate names if (column.name in columns_ex): raise ETLConfigurationException( "Duplicate column name '%s' in %s" % (column.name, self)) columns_ex.append(column.name) # Configure column if isinstance(column, SQLColumnFK): if column.fk_sqlcolumn.sqltable.sa_table is None: logger.warning( "Column %s foreign key %s table (%s) has not been defined in backend (ignoring).", column, column.fk_sqlcolumn, column.fk_sqlcolumn.sqltable) continue self.sa_table.append_column( Column(column.name, self._get_sa_type(column), ForeignKey( column.fk_sqlcolumn.sqltable.sa_table.columns[ column.fk_sqlcolumn.name]), primary_key=column.pk, nullable=column.nullable, autoincrement=(True if column.type == "AutoIncrement" else False))) else: self.sa_table.append_column( Column(column.name, self._get_sa_type(column), primary_key=column.pk, nullable=column.nullable, autoincrement=(True if column.type == "AutoIncrement" else False))) # Check schema: # Create if doesn't exist if (not self.connection.engine().has_table(self.name)): logger.info("Creating table %s" % self.name) self.sa_table.create(self.connection.connection()) # TODO:? Extend? (unsafe, allow read-only connections and make them default?) # TODO:? Delete columns (unsafe, allow read-only connections and make them default?) def pk(self, ctx): """ Returns the primary key column definitToClauion, or None if none defined. """ #if (self._pk == False): if True: pk_cols = [] for col in self.columns: if col.pk: pk_cols.append(col) if (len(pk_cols) > 1): raise Exception("Table %s has multiple primary keys: %s" % (self.name, pk_cols)) elif (len(pk_cols) == 1): self._pk = pk_cols[0] else: self._pk = None return self._pk def _attribsToClause(self, attribs): clauses = [] for k, v in attribs.items(): if isinstance(v, (list, tuple)): clauses.append(self.sa_table.c[k].in_(v)) else: clauses.append(self.sa_table.c[k] == v) return and_(*clauses) def _rowtodict(self, row): d = {} for column in self.columns: #print column d[column.name] = getattr(row, column.name) return d def _find(self, ctx, attribs): self._selects = self._selects + 1 SQLTable._selects = SQLTable._selects + 1 query = self.sa_table.select(self._attribsToClause(attribs)) rows = self.connection.connection().execute(query) for r in rows: # Ensure we return dicts, not RowProxys from SqlAlchemy yield self._rowtodict(r) def lookup(self, ctx, attribs, find_function=None): logger.debug("Lookup on '%s' attribs: %s" % (self, attribs)) if (len(attribs.keys()) == 0): raise Exception( "Cannot lookup on table '%s' with no criteria (empty attribute set)" % self.name) find_function = find_function or self._find rows = find_function(ctx, attribs) rows = list(rows) if (len(rows) > 1): raise Exception( "Found more than one row when searching for just one in table %s: %s" % (self.name, attribs)) elif (len(rows) == 1): row = rows[0] else: row = None logger.debug("Lookup result on %s: %s = %s" % (self.name, attribs, row)) return row def upsert(self, ctx, data, keys=[]): """ Upsert checks if the row exists and has changed. It does a lookup followed by an update or insert as appropriate. """ # TODO: Check for AutoIncrement in keys, shall not be used # If keys qfilter = {} if (len(keys) > 0): for key in keys: try: qfilter[key] = data[key] except KeyError as e: raise Exception( "Could not find attribute '%s' in data when storing row data: %s" % (key, data)) else: pk = self.pk(ctx) qfilter[pk.name] = data[pk.name] # Do lookup if len(qfilter) > 0: row = self.lookup(ctx, qfilter) if (row): # Check row is identical for c in self.columns: if c.type != "AutoIncrement": v1 = row[c.name] v2 = data[c.name] if c.type == "Date": v1 = row[c.name].strftime('%Y-%m-%d') v2 = data[c.name].strftime('%Y-%m-%d') if (isinstance(v1, str) or isinstance(v2, str)): if (not isinstance(v1, str)): v1 = str(v1) if (not isinstance(v2, str)): v2 = str(v2) if (v1 != v2): if (c.name not in self._lookup_changed_fields): logger.warning( "%s updating an entity that exists with different attributes, overwriting (field=%s, existing_value=%s, tried_value=%s)" % (self, c.name, v1, v2)) #self._lookup_changed_fields.append(c["name"]) # Update the row row = self.update(ctx, data, keys) return row row_with_id = self.insert(ctx, data) return row_with_id def _prepare_row(self, ctx, data): row = {} for column in self.columns: if column.type != "AutoIncrement": try: row[column.name] = data[column.name] except KeyError as e: raise Exception( "Missing attribute for column %s in table '%s' while inserting row: %s" % (e, self.name, data)) # Checks if (column.type == "String") and (not isinstance( row[column.name], str)): self._unicode_errors = self._unicode_errors + 1 if (ctx.debug): logger.warning( "Unicode column %r received non-unicode string: %r " % (column.name, row[column.name])) return row def insert(self, ctx, data): row = self._prepare_row(ctx, data) logger.debug("Inserting in table '%s' row: %s" % (self.name, row)) res = self.connection.connection().execute(self.sa_table.insert(row)) pk = self.pk(ctx) if pk: row[pk.name] = res.inserted_primary_key[0] self._inserts = self._inserts + 1 SQLTable._inserts = SQLTable._inserts + 1 if pk is not None: return row else: return row # None def update(self, ctx, data, keys=[]): row = self._prepare_row(ctx, data) # Automatically calculate lookup if necessary qfilter = {} if (len(keys) > 0): for key in keys: try: qfilter[key] = data[key] except KeyError as e: raise Exception( "Could not find attribute '%s' in data when storing row data: %s" % (key, data)) else: pk = self.pk(ctx) qfilter[pk.name] = data[pk.name] logger.debug("Updating in table '%s' row: %s" % (self.name, row)) res = self.connection.connection().execute( self.sa_table.update(self._attribsToClause(qfilter), row)) self._updates = self._updates + 1 SQLTable._updates = SQLTable._updates + 1 if pk is not None: return row else: return None