Exemple #1
0
def clear_database(
    engine: Union[Engine, Connection], schemas: Iterable[str] = ()) -> None:
    """
    Clear any tables from an existing database.

    For SQLite engines, the target database file will be deleted and a new one is created in its
    place.

    :param engine: the engine or connection to use
    :param schemas: full list of schema names to expect (ignored for SQLite)

    """
    assert check_argument_types()
    if engine.dialect.name == 'sqlite':
        # SQLite does not support dropping constraints and it's faster to just delete the file
        if engine.url.database not in (None, ':memory:') and os.path.isfile(
                engine.url.database):
            os.remove(engine.url.database)
    else:
        metadatas = []
        for schema in (None, ) + tuple(schemas):
            # Reflect the schema to get the list of the tables, views and constraints
            metadata = MetaData()
            metadata.reflect(engine, schema=schema, views=True)
            metadatas.append(metadata)

        for metadata in metadatas:
            metadata.drop_all(engine, checkfirst=False)
    def reflect_model(self, table_name, bind_key=None):
        """ 反向生成 ORM 的 Model
        :param table_name:
        :param bind_key:
        :return: ORMClass
        """
        with self._reflect_lock:
            if table_name in self._models:
                return self._models[table_name]

            engine = self.get_engine(bind_key)
            meta = MetaData(bind=engine)
            meta.reflect(only=[table_name])

            table = meta.tables[table_name]
            self._tables[table_name] = table

            Base = automap_base(metadata=meta)
            Base.prepare()

            model = getattr(Base.classes, table_name)
            model.__table__.metadata = None
            self._models[table_name] = model

            return model
    def reflect_model(self, table_name, bind_key=None):
        """ 反向生成 ORM 的 Model
        :param table_name:
        :param bind_key:
        :return: ORMClass
        """
        with self._reflect_lock:
            if table_name in self._models:
                return self._models[table_name]

            engine = self.get_engine(bind_key)
            meta = MetaData(bind=engine)
            meta.reflect(only=[table_name])

            table = meta.tables[table_name]
            self._tables[table_name] = table

            Base = automap_base(metadata=meta)
            Base.prepare()

            model = getattr(Base.classes, table_name)
            model.__table__.metadata = None
            self._models[table_name] = model

            return model
def test_clear_database(engine):
    clear_database(engine, ['altschema'])
    metadata = MetaData()
    metadata.reflect(engine)
    assert len(metadata.tables) == 0

    if engine.dialect.name != 'sqlite':
        alt_metadata = MetaData(schema='altschema')
        alt_metadata.reflect(engine)
        assert len(alt_metadata.tables) == 0
Exemple #5
0
    def test_get_table_names(self, engine, conn):
        meta = MetaData()
        meta.reflect(bind=engine)
        print(meta.tables)
        self.assertIn("one_row", meta.tables)
        self.assertIn("one_row_complex", meta.tables)

        insp = sqlalchemy.inspect(engine)
        self.assertIn(
            "many_rows", insp.get_table_names(schema=SCHEMA),
        )
def test_clear_database(connection):
    clear_database(
        connection, ["altschema"] if connection.dialect.name != "sqlite" else []
    )
    metadata = MetaData()
    metadata.reflect(connection)
    assert len(metadata.tables) == 0

    if connection.dialect.name != "sqlite":
        alt_metadata = MetaData(schema="altschema")
        alt_metadata.reflect(connection)
        assert len(alt_metadata.tables) == 0
    def reflect_table(self, table_name, bind_key=None):
        with self._reflect_lock:
            if table_name in self._tables:
                return self._tables[table_name]

            engine = self.get_engine(bind_key)
            meta = MetaData(bind=engine)
            meta.reflect(only=[table_name])

            table = meta.tables[table_name]
            table.metadata = None
            self._tables[table_name] = table

            return table
    def reflect_table(self, table_name, bind_key=None):
        with self._reflect_lock:
            if table_name in self._tables:
                return self._tables[table_name]

            engine = self.get_engine(bind_key)
            meta = MetaData(bind=engine)
            meta.reflect(only=[table_name])

            table = meta.tables[table_name]
            table.metadata = None
            self._tables[table_name] = table

            return table
def clear_database(
    engine: Engine | Connection, schemas: Iterable[str] = ()) -> None:
    """
    Clear any tables from an existing database using a synchronous connection/engine.

    :param engine: the engine or connection to use
    :param schemas: full list of schema names to expect (ignored for SQLite)

    """
    metadatas = []
    all_schemas: tuple[str | None, ...] = (None, )
    all_schemas += tuple(schemas)
    for schema in all_schemas:
        # Reflect the schema to get the list of the tables, views and constraints
        metadata = MetaData()
        metadata.reflect(engine, schema=schema, views=True)
        metadatas.append(metadata)

    for metadata in metadatas:
        metadata.drop_all(engine, checkfirst=False)
    def run(self):
        # Source reflection
        source_meta = MetaData()
        source_meta.reflect(bind=self.source_engine)
        source_tables = source_meta.tables

        source_table_names = [k for k, v in source_tables.items()]

        # Destination Binding
        destination_meta = MetaData(bind=self.destination_engine)
        for name, table in source_tables.items():
            table.metadata = destination_meta
            if name in self.settings.exclude_data.keys():
                table.__mapper_args__ = {"exclude_properties": self.settings.exclude_data[name]}

        # Drop table for testing purposes
        # destination_meta.drop_all(self.destination_engine)
        for table in source_table_names:
            self.sessions.destination.execute("DROP TABLE {table};".format(table=table))
            self.sessions.destination.commit()
            print("DROPPED TABLE {table}".format(table=table))

        # Begin migration
        source_meta.create_all(self.destination_engine)

        source_data = {table: self.sessions.source.query(source_tables[table]).all() for table in source_table_names}

        for table in source_table_names:
            print("Migrating:", table)
            # if table in self.settings.exclude_data.keys():
            #     pprint(source_tables[table].__mapper_args__)
            #     exit(1)
            for row in source_data[table]:
                try:
                    self.sessions.destination.execute(source_tables[table].insert(row))
                except StatementError:
                    print("Bad data in table: ", table, "row data:\n", row[0], "Error:", sys.exc_info()[0])
            print("Data for:", table, "added to the queue..")

        self.sessions.destination.commit()
        print("Migration Complete!")
Exemple #11
0
    def run(self):
        # Source reflection
        source_meta = MetaData()
        source_meta.reflect(bind=self.source_engine)
        source_tables = source_meta.tables

        source_table_names = [k for k, v in source_tables.items()]

        # Destination Binding
        destination_meta = MetaData(bind=self.destination_engine)
        for name, table in source_tables.items():
            table.metadata = destination_meta
            if name in self.settings.exclude_data.keys():
                table.__mapper_args__ = {'exclude_properties': self.settings.exclude_data[name]}

        # Drop table for testing purposes
        # destination_meta.drop_all(self.destination_engine)
        for table in source_table_names:
            self.sessions.destination.execute('DROP TABLE {table};'.format(table=table))
            self.sessions.destination.commit()
            print('DROPPED TABLE {table}'.format(table=table))

        # Begin migration
        source_meta.create_all(self.destination_engine)

        source_data = {table: self.sessions.source.query(source_tables[table]).all() for table in source_table_names}

        for table in source_table_names:
            print("Migrating:", table)
            # if table in self.settings.exclude_data.keys():
            #     pprint(source_tables[table].__mapper_args__)
            #     exit(1)
            for row in source_data[table]:
                try:
                    self.sessions.destination.execute(source_tables[table].insert(row))
                except StatementError:
                    print('Bad data in table: ', table, 'row data:\n', row[0], 'Error:', sys.exc_info()[0])
            print('Data for:', table, 'added to the queue..')

        self.sessions.destination.commit()
        print('Migration Complete!')
if (len(argv) > 6 and argv[6].upper() == 'TRUE'):
    limpar_tabela = True
else:
    limpar_tabela = False




#Mapeia a database do pontual
__m = MetaData(schema='pontual')
__m2 = MetaData(schema='seguranca')
# __m = MetaData()
url = "postgresql://%s:%s@%s/pontual" % (usuario, senha, ip_nome)
__engine = create_engine(url)
# __m.reflect(__engine, only=['linha', 'area_de_fiscalizacao', 'ponto_de_parada'])
__m2.reflect(__engine, only=['user'])
__Base = automap_base(bind=__engine, metadata=__m)
__Base.prepare(__engine, reflect=True)
__Base2 = automap_base(bind=__engine, metadata=__m2)
__Base2.prepare(__engine, reflect=True)


#Objeto ORMs do sqlalchmy do pontual
User = __Base2.classes.user
Linha = __Base.classes.linha
AreaDeFiscalizacao = __Base.classes.area_de_fiscalizacao
PontoDeParada = __Base.classes.ponto_de_parada


Sessao = sessionmaker(bind=__engine)
class EnronDB:
    def __init__(self, table_name):
        self.engine = None
        self.metadata = MetaData()
        self.table_name = table_name
        
    @classmethod
    def holbox_db(cls):
        db = EnronDB("email_prediction")
        db.init('holbox.lti.cs.cmu.edu', 'inmind', 'yahoo', 'enron_experiment')
        return db
        
    def init(self, host, username, password, db_name):
        engine_desc = 'mysql://%s:%s@%s/%s' % (username, password, host, db_name)
        try:
            self.engine = create_engine(engine_desc)
            self.metadata.reflect(self.engine)
        except:
            print "Unexpected error:", sys.exc_info()[0]
            return False
        return True    
    
    # sql:
    # create table TABLE_NAME (id INT NOT NULL AUTO_INCREMENT, date DATETIME, mime_type TEXT, from_addr TEXT, 
    # to_addr TEXT, subject TEXT, raw_body TEXT, cleaned_body TEXT, one_line TEXT, path TEXT, prediction INT, PRIMARY KEY(id));
    def create_table(self):
        email_table = Table(self.table_name, self.metadata,
            Column('id', Integer, primary_key=True),
            Column('date', Text),
            Column('mime_type', Text),
            Column('from_addr', Text),
            Column('to_addr', Text),
            Column('subject', Text),
            Column('raw_body', Text),
            Column('cleaned_body', Text),
            Column('one_line', Text),
            Column('path', Text),
            Column('prediction', Integer),
            Column('probability', Float)
        )
        email_table.create(self.engine)

    def create_sample_table(self, sample_table_name):
        if sample_table_name == self.table_name:
            print('Cannon use the same table name')
            return
        email_table = Table(sample_table_name, self.metadata,
            Column('id', Integer, primary_key=True),
            Column('date', Text),
            Column('mime_type', Text),
            Column('from_addr', Text),
            Column('to_addr', Text),
            Column('subject', Text),
            Column('raw_body', Text),
            Column('cleaned_body', Text),
            Column('one_line', Text),
            Column('path', Text),
            Column('prediction', Integer),
            Column('probability', Float),
            Column('manual_label', Integer)
        )
        email_table.create(self.engine)

    def get_all_brushed_emails(self):
        email_table = Table('brushed_email', self.metadata)
        sel_stmt = select([email_table.c.id, email_table.c.date, email_table.c.mime_type, \
                           email_table.c.from_addr, email_table.c.to_adddr, \
                           email_table.c.subject, email_table.c.body, email_table.c.one_line, \
                           email_table.c.path, email_table.c.label, email_table.c.is_scheduling])
        rp = self.engine.execute(sel_stmt)
        emails = []
        for record in rp:
            email = Email()
            if record is not None:
                email.id = record.id
                email.date = record.date
                email.mime_type = record.mime_type
                email.from_addr = record.from_addr
                email.to_addr = record.to_adddr
                email.subject = record.subject
                email.body = record.body
                email.one_line = record.one_line
                email.path = record.path
                email.label = record.label
                email.is_scheduling = record.is_scheduling or 0
            emails.append(email)
        return emails
    
    def insert_email(self, email):
        if not isinstance(email, Email):
            print 'ERROR: input must be of type Email'
            return
        
        email_table = Table(self.table_name, self.metadata)
        ins_stmt = email_table.insert()
        conn = self.engine.connect()
        conn.execute(ins_stmt, date=email.date,
                              mime_type=email.mime_type,
                              from_addr=email.from_addr,
                              to_addr=email.to_addr,
                              subject=email.subject,
                              raw_body=email.raw_body,
                              cleaned_body=email.cleaned_body,
                              one_line=email.one_line,
                              path=email.path,
                              label=email.label,
                              prediction=email.prediction,
                              probability=email.probability
                              ) 
    
    def get_all_email_predictions(self):
        email_table = Table(self.table_name, self.metadata)
        sel_stmt = select([email_table.c.id, email_table.c.date, email_table.c.mime_type, \
                           email_table.c.from_addr, email_table.c.to_addr, \
                           email_table.c.subject, email_table.c.raw_body, email_table.c.cleaned_body, email_table.c.one_line, \
                           email_table.c.path, email_table.c.prediction, email_table.c.probability])
        rp = self.engine.execute(sel_stmt)
        emails = []
        for record in rp:
            email = Email()
            if record is not None:
                email.id = record.id
                email.date = record.date
                email.mime_type = record.mime_type
                email.from_addr = record.from_addr
                email.to_addr = record.to_addr
                email.subject = record.subject
                email.raw_body = record.raw_body
                email.cleaned_body = record.cleaned_body
                email.one_line = record.one_line
                email.path = record.path
                email.prediction = record.prediction
                email.probability = record.probability
            emails.append(email)
        return emails

    def get_sample_emails(self, sample_table_name):
        email_table = Table(sample_table_name, self.metadata)
        sel_stmt = select([email_table.c.id, email_table.c.date, email_table.c.mime_type, \
                           email_table.c.from_addr, email_table.c.to_addr, \
                           email_table.c.subject, email_table.c.raw_body, email_table.c.cleaned_body, email_table.c.one_line, \
                           email_table.c.path, email_table.c.prediction, email_table.c.probability, email_table.c.manual_label])
        rp = self.engine.execute(sel_stmt)
        emails = []
        for record in rp:
            email = Email()
            if record is not None:
                email.id = record.id
                email.date = record.date
                email.mime_type = record.mime_type
                email.from_addr = record.from_addr
                email.to_addr = record.to_addr
                email.subject = record.subject
                email.raw_body = record.raw_body
                email.cleaned_body = record.cleaned_body
                email.one_line = record.one_line
                email.path = record.path
                email.prediction = record.prediction
                email.probability = record.probability
                email.manual_label = record.manual_label
            emails.append(email)
        return emails

    def get_all_email_predictions_greater_than(self, threshold = 0.7):
        s = text("select * from " + self.table_name + " where probability >= " + str(threshold))
        rp = self.engine.execute(s).fetchall()
#         email_table = Table(self.table_name, self.metadata)
#         sel_stmt = select([email_table.c.id, email_table.c.date, email_table.c.mime_type, \
#                            email_table.c.from_addr, email_table.c.to_addr, \
#                            email_table.c.subject, email_table.c.raw_body, email_table.c.cleaned_body, email_table.c.one_line, \
#                            email_table.c.path, email_table.c.prediction, email_table.c.probability]).where(email_table.c.probability >= 0.7)
#         rp = self.engine.execute(sel_stmt)
        emails = []
        for record in rp:
            email = Email()
            if record is not None:
                email.id = record.id
                email.date = record.date
                email.mime_type = record.mime_type
                email.from_addr = record.from_addr
                email.to_addr = record.to_addr
                email.subject = record.subject
                email.raw_body = record.raw_body
                email.cleaned_body = record.cleaned_body
                email.one_line = record.one_line
                email.path = record.path
                email.prediction = record.prediction
                email.probability = record.probability
            emails.append(email)
        return emails
        
    def insert_sample_email(self, sample_table_name, email):
        if not isinstance(email, Email):
            print 'ERROR: input must be of type Email'
            return
        
        if sample_table_name == self.table_name:
            print('Cannot use the same table name')
            return
        
        email_table = Table(sample_table_name, self.metadata)
        ins_stmt = email_table.insert()
        conn = self.engine.connect()
        conn.execute(ins_stmt, date=email.date,
                              mime_type=email.mime_type,
                              from_addr=email.from_addr,
                              to_addr=email.to_addr,
                              subject=email.subject,
                              raw_body=email.raw_body,
                              cleaned_body=email.cleaned_body,
                              one_line=email.one_line,
                              path=email.path,
                              label=email.label,
                              prediction=email.prediction,
                              probability=email.probability,
                              ) 
Exemple #14
0
db_username = config['Username']
db_password = config['Password']
db_address = config['EndpointAddress']
db_port = config['EndpointPort']
db_name = config['Name']

# LEGGERE http://docs.sqlalchemy.org/en/latest/core/pooling.html
engine = create_engine('mysql://' + db_username + ':' + db_password + '@' +
                       db_address + ':' + db_port + '/' + db_name +
                       '?charset=utf8&use_unicode=0',
                       pool_recycle=3600)

# useful for tables retrieval
metadata = MetaData()
metadata.reflect(engine)

# tables retrieval
# retrieve session table
session_table = metadata.tables.get('session')
# retrieve task table
task_table = metadata.tables.get('task')
# retrieve iteration table
iter_table = metadata.tables.get('iteration')
# retrieve preferences table
pref_table = metadata.tables.get('preferences')

############
#  GLOBAL  #
############
def upgrade():
    # ### commands auto generated by Alembic - please adjust! ###
    op.create_table('corp_types',
    sa.Column('corp_type_cd', sa.String(length=5), nullable=False),
    sa.Column('colin_ind', sa.String(length=1), nullable=False),
    sa.Column('corp_class', sa.String(length=10), nullable=False),
    sa.Column('short_desc', sa.String(length=25), nullable=False),
    sa.Column('full_desc', sa.String(length=100), nullable=False),
    sa.Column('legislation', sa.String(length=100), nullable=True),
    sa.PrimaryKeyConstraint('corp_type_cd')
    )

    
    meta = MetaData(bind=op.get_bind())
    meta.reflect(only=('corp_types',))

    corp_types_table = Table('corp_types', meta)

    op.bulk_insert(
        corp_types_table,
        [
            {'corp_type_cd': 'A', 'colin_ind': 'Y', 'corp_class': 'XPRO', 'short_desc': 'EXTRA PRO', 'full_desc': 'Extraprovincial Company', 'legislation': 'BC Business Corporations Act'},
            {'corp_type_cd': 'B', 'colin_ind': 'Y', 'corp_class': 'XPRO', 'short_desc': 'EXTRA PRO', 'full_desc': 'Extraprovincial Company', 'legislation': 'BC Business Corporations Act'},
            {'corp_type_cd': 'BC', 'colin_ind': 'Y', 'corp_class': 'BC', 'short_desc': 'BC COMPANY', 'full_desc': 'BC Limited Company', 'legislation': 'BC Business Corporations Act'},
            {'corp_type_cd': 'C', 'colin_ind': 'Y', 'corp_class': 'BC', 'short_desc': 'CONTINUE IN', 'full_desc': 'BC Limited Company', 'legislation': 'BC Business Corporations Act'},
            {'corp_type_cd': 'CEM', 'colin_ind': 'N', 'corp_class': 'OT', 'short_desc': 'CEMETARY', 'full_desc': 'Cemetary', 'legislation': ''},
            {'corp_type_cd': 'CP', 'colin_ind': 'N', 'corp_class': 'OT', 'short_desc': 'COOP', 'full_desc': 'BC Cooperative Association', 'legislation': 'BC Cooperative Association Act'},
            {'corp_type_cd': 'EPR', 'colin_ind': 'Y', 'corp_class': 'XPRO', 'short_desc': 'EXTRA PRO REG', 'full_desc': 'Extraprovincial Registration', 'legislation': ''},
            {'corp_type_cd': 'FOR', 'colin_ind': 'Y', 'corp_class': 'XPRO', 'short_desc': 'FOREIGN', 'full_desc': 'Foreign Registration', 'legislation': ''},
            {'corp_type_cd': 'LIC', 'colin_ind': 'Y', 'corp_class': 'XPRO', 'short_desc': 'LICENSED', 'full_desc': 'Licensed (Extra-Pro)', 'legislation': ''},
            {'corp_type_cd': 'LIB', 'colin_ind': 'N', 'corp_class': 'OT', 'short_desc': 'LIBRARY', 'full_desc': 'Public Library Association', 'legislation': ''},
            {'corp_type_cd': 'LLC', 'colin_ind': 'Y', 'corp_class': 'XPRO', 'short_desc': 'LIMITED CO', 'full_desc': 'Limited Liability Company', 'legislation': 'BC Business Corporations Act'},
            {'corp_type_cd': 'PA', 'colin_ind': 'N', 'corp_class': 'OT', 'short_desc': 'PRIVATE ACT', 'full_desc': 'Private Act', 'legislation': 'Private Act'},
            {'corp_type_cd': 'PAR', 'colin_ind': 'N', 'corp_class': 'OT', 'short_desc': 'PARISHES', 'full_desc': 'Parishes', 'legislation': 'BC Partnership Act'},
            {'corp_type_cd': 'PFS', 'colin_ind': 'N', 'corp_class': 'OT', 'short_desc': 'PENS FUND SOC', 'full_desc': 'Pension Funded Society', 'legislation': ''},
            {'corp_type_cd': 'QA', 'colin_ind': 'Y', 'corp_class': 'BC', 'short_desc': 'CO 1860', 'full_desc': 'CO 1860', 'legislation': ''},
            {'corp_type_cd': 'QB', 'colin_ind': 'Y', 'corp_class': 'BC', 'short_desc': 'CO 1862', 'full_desc': 'CO 1862', 'legislation': ''},
            {'corp_type_cd': 'QC', 'colin_ind': 'Y', 'corp_class': 'BC', 'short_desc': 'CO 1878', 'full_desc': 'CO 1878', 'legislation': ''},
            {'corp_type_cd': 'QD', 'colin_ind': 'Y', 'corp_class': 'BC', 'short_desc': 'CO 1890', 'full_desc': 'CO 1890', 'legislation': ''},
            {'corp_type_cd': 'QE', 'colin_ind': 'Y', 'corp_class': 'BC', 'short_desc': 'CO 1897', 'full_desc': 'CO 1897', 'legislation': ''},
            {'corp_type_cd': 'REG', 'colin_ind': 'Y', 'corp_class': 'XPRO', 'short_desc': 'REGISTRATION', 'full_desc': 'Registraton (Extra-pro)', 'legislation': 'BC Business Corporations Act'},
            {'corp_type_cd': 'RLY', 'colin_ind': 'N', 'corp_class': 'OT', 'short_desc': 'RAILWAYS', 'full_desc': 'Railways', 'legislation': ''},
            {'corp_type_cd': 'SB', 'colin_ind': 'N', 'corp_class': 'OT', 'short_desc': 'SOCIETY BRANCH', 'full_desc': 'Society Branch', 'legislation': ''},
            {'corp_type_cd': 'T', 'colin_ind': 'N', 'corp_class': 'OT', 'short_desc': 'TRUST', 'full_desc': 'Trust', 'legislation': 'BC Partnership Act'},
            {'corp_type_cd': 'TMY', 'colin_ind': 'N', 'corp_class': 'OT', 'short_desc': 'TRAMWAYS', 'full_desc': 'Tramways', 'legislation': ''},
            {'corp_type_cd': 'XCP', 'colin_ind': 'N', 'corp_class': 'OT', 'short_desc': 'XPRO COOP', 'full_desc': 'Extraprovincial Cooperative Association', 'legislation': 'BC Cooperative Association Act'},
            {'corp_type_cd': 'ULC', 'colin_ind': 'Y', 'corp_class': 'BC', 'short_desc': 'BC ULC COMPANY', 'full_desc': 'BC Unlimited Liability Company', 'legislation': 'BC Business Corporations Act'},
            {'corp_type_cd': 'CUL', 'colin_ind': 'Y', 'corp_class': 'BC', 'short_desc': 'ULC CONTINUE IN', 'full_desc': 'Continuation In as a BC ULC', 'legislation': 'BC Business Corporations Act'},
            {'corp_type_cd': 'UQA', 'colin_ind': 'Y', 'corp_class': 'BC', 'short_desc': 'ULC CO 1860', 'full_desc': 'ULC CO 1860', 'legislation': ''},
            {'corp_type_cd': 'UQB', 'colin_ind': 'Y', 'corp_class': 'BC', 'short_desc': 'ULC CO 1862', 'full_desc': 'ULC CO 1862', 'legislation': ''},
            {'corp_type_cd': 'UQC', 'colin_ind': 'Y', 'corp_class': 'BC', 'short_desc': 'ULC CO 1878', 'full_desc': 'ULC CO 1878', 'legislation': ''},
            {'corp_type_cd': 'UQD', 'colin_ind': 'Y', 'corp_class': 'BC', 'short_desc': 'ULC CO 1890', 'full_desc': 'ULC CO 1890', 'legislation': ''},
            {'corp_type_cd': 'UQE', 'colin_ind': 'Y', 'corp_class': 'BC', 'short_desc': 'ULC CO 1897', 'full_desc': 'ULC CO 1897', 'legislation': ''},
            {'corp_type_cd': 'CC', 'colin_ind': 'Y', 'corp_class': 'BC', 'short_desc': 'BC CCC', 'full_desc': 'BC Community Contribution Company', 'legislation': 'BC Business Corporations Act'},
            {'corp_type_cd': 'CCC', 'colin_ind': 'Y', 'corp_class': 'BC', 'short_desc': 'CCC CONTINUE IN', 'full_desc': 'BC Community Contribution Company', 'legislation': 'BC Business Corporations Act'},
            {'corp_type_cd': 'S', 'colin_ind': 'Y', 'corp_class': 'SOC', 'short_desc': 'SOCIETY', 'full_desc': 'Society', 'legislation': 'BC Societies Act'},
            {'corp_type_cd': 'XS', 'colin_ind': 'Y', 'corp_class': 'SOC', 'short_desc': 'XPRO SOCIETY', 'full_desc': 'Extraprovincial Society', 'legislation': 'BC Societies Act'},
            {'corp_type_cd': 'SP', 'colin_ind': 'Y', 'corp_class': 'FIRM', 'short_desc': 'SOLE PROP', 'full_desc': 'Sole Proprietorship', 'legislation': 'BC Partnership Act'},
            {'corp_type_cd': 'GP', 'colin_ind': 'Y', 'corp_class': 'FIRM', 'short_desc': 'PARTNERSHIP', 'full_desc': 'General Partnership', 'legislation': 'BC Partnership Act'},
            {'corp_type_cd': 'LP', 'colin_ind': 'Y', 'corp_class': 'FIRM', 'short_desc': 'LIM PARTNERSHIP', 'full_desc': 'Limited Partnership', 'legislation': 'BC Partnership Act'},
            {'corp_type_cd': 'XP', 'colin_ind': 'Y', 'corp_class': 'FIRM', 'short_desc': 'XPRO LIM PARTNR', 'full_desc': 'Extraprovincial Limited Partnership', 'legislation': 'BC Partnership Act'},
            {'corp_type_cd': 'LL', 'colin_ind': 'Y', 'corp_class': 'FIRM', 'short_desc': 'LL PARTNERSHIP', 'full_desc': 'Limited Liability Partnership', 'legislation': 'BC Partnership Act'},
            {'corp_type_cd': 'XL', 'colin_ind': 'Y', 'corp_class': 'FIRM', 'short_desc': 'XPRO LL PARTNR', 'full_desc': 'Extrapro Limited Liability Partnership', 'legislation': 'BC Partnership Act'},
            {'corp_type_cd': 'MF', 'colin_ind': 'Y', 'corp_class': 'FIRM', 'short_desc': 'MISC FIRM', 'full_desc': 'Miscellaneous Firm', 'legislation': 'BC Partnership Act'},
            {'corp_type_cd': 'FI', 'colin_ind': 'N', 'corp_class': 'OT', 'short_desc': 'FINANCIAL', 'full_desc': 'Financial Institutions', 'legislation': 'Credit Union Incorporation Act'},
            {'corp_type_cd': 'CS', 'colin_ind': 'Y', 'corp_class': 'SOC', 'short_desc': 'CONT IN SOCIETY', 'full_desc': 'Society', 'legislation': 'BC Societies Act'},
            {'corp_type_cd': 'BEN', 'colin_ind': 'Y', 'corp_class': 'BC', 'short_desc': 'BENEFIT COMPANY', 'full_desc': 'BC Benefit Company', 'legislation': 'BC Business Corporations Act'}
        ]
    )
Exemple #16
0
# grabs the schema from the db
# run this any time the schema changes
# side note: metadata.pickle is kept in git

if __name__ == '__main__':
    from mimic_package.connect.connect import connection_string
    from sqlalchemy.sql.schema import MetaData
    from sqlalchemy.engine import create_engine
    from mimic_package.data_model.resources import metadata_filename
    import pickle

    engine = create_engine(connection_string, echo=False, convert_unicode=True)

    metadata = MetaData(bind=engine)
    metadata.reflect(schema='mimiciii')

    with open(metadata_filename, 'wb') as outfile:
        pickle.dump(metadata, outfile)
class EnronDB:
    def __init__(self):
        self.engine = None
        self.metadata = MetaData()
        
    def init(self, host, username, password, db_name):
        engine_desc = 'mysql://%s:%s@%s/%s' % (username, password, host, db_name)
        try:
            self.engine = create_engine(engine_desc)
            self.metadata.reflect(self.engine)
        except:
            print "Unexpected error:", sys.exc_info()[0]
            return False
        return True    
    
    # RAW_EMAIL table
    def insert_email(self, email):
        self.insert_to_table(email, "raw_email")
    
    # RAW_EMAIL table
    def insert_cleaned_email(self, email):
        self.insert_to_table(email, "cleaned_email")

    def insert_to_table(self, email, table_name):
        if not isinstance(email, Email):
            print 'ERROR: input must be of type Email'
            return
        
        email_table = Table(table_name, self.metadata)
        ins_stmt = email_table.insert()
        conn = self.engine.connect()
        result = conn.execute(ins_stmt, date=email.date,
                              mime_type=email.mime_type,
                              from_addr=email.from_addr,
                              to_addr=email.to_addr,
                              subject=email.subject,
                              body=email.body,
                              path=email.path,
                              label=email.label)
    
    def get_all_content(self):
        email_table = Table('raw_email', self.metadata)
        sel_stmt = select([email_table.c.subject, email_table.c.body])
        rp = self.engine.execute(sel_stmt)
        all_content = ""
        for record in rp:
            all_content += record.subject + " "
            all_content += record.body + " "
        return all_content
    
    def add_username(self):
        email_table = Table('brushed_email', self.metadata)
        sel_stmt = select([email_table.c.id, email_table.c.path])
        rp = self.engine.execute(sel_stmt)
        conn = self.engine.connect()
        for record in rp:
#             print(record)
            p = "\/[^\/]*\/([^\/]+)"  # match the content between the second / and the third /
            match = re.match(p, record.path)
            if match:
                username = match.group(1)
                stmt = email_table.update().where(email_table.c.id == record.id).values(username=username)
                conn.execute(stmt)
            else:
                print("Error! " + record.path)
                exit(0)
    
    def update_brushed_email_is_scheduling(self, email_id, is_scheduling):
        email_table = Table('brushed_email', self.metadata)
        conn = self.engine.connect()
        stmt = email_table.update().where(email_table.c.id == email_id).values(is_scheduling=is_scheduling)
        conn.execute(stmt)

    def get_all_dates(self):
        email_table = Table('raw_email', self.metadata)
        sel_stmt = select([email_table.c.date])
        rp = self.engine.execute(sel_stmt)
        dates = []
        for record in rp:
            dates.append(record.date.strftime("%y%m%d"))
        return dates
    
    def get_all_subjects(self):
        email_table = Table('raw_email', self.metadata)
        sel_stmt = select([email_table.c.subject])
        rp = self.engine.execute(sel_stmt)
        subjects = []
        for record in rp:
            subjects.append(record.subject)
        return subjects    
    
    def get_all_bodies_with_id(self):
        email_table = Table('raw_email', self.metadata)
        sel_stmt = select([email_table.c.id, email_table.c.body])
        rp = self.engine.execute(sel_stmt)
        bodies = []
        for record in rp:
            bodies.append((record.id, record.body))
        return bodies    
    
    def get_body(self, email_id):
        email_table = Table('raw_email', self.metadata)
        sel_stmt = select([email_table.c.body]).where(email_table.c.id == email_id)
        rp = self.engine.execute(sel_stmt)
        record = rp.first()
        return record.body
    
    def get_all_bodies(self):
        email_table = Table('raw_email', self.metadata)
        sel_stmt = select([email_table.c.body])
        rp = self.engine.execute(sel_stmt)
        bodies = []
        for record in rp:
            bodies.append(record.body)
        return bodies

    def get_all_brushed_emails(self):
        email_table = Table('brushed_email', self.metadata)
        sel_stmt = select([email_table.c.id, email_table.c.date, email_table.c.mime_type, \
                           email_table.c.from_addr, email_table.c.to_adddr, \
                           email_table.c.subject, email_table.c.body, email_table.c.one_line, \
                           email_table.c.path, email_table.c.label, email_table.c.is_scheduling])
        rp = self.engine.execute(sel_stmt)
        emails = []
        for record in rp:
            email = Email()
            if record is not None:
                email.id = record.id
                email.date = record.date
                email.mime_type = record.mime_type
                email.from_addr = record.from_addr
                email.to_addr = record.to_adddr
                email.subject = record.subject
                email.body = record.body
                email.one_line = record.one_line
                email.path = record.path
                email.label = record.label
                email.is_scheduling = record.is_scheduling or 0
            emails.append(email)
        return emails
    
    def get_brushed_email(self, email_id):
        email_table = Table('brushed_email', self.metadata)
        sel_stmt = select([email_table.c.date, email_table.c.mime_type, \
                           email_table.c.from_addr, email_table.c.to_adddr, \
                           email_table.c.subject, email_table.c.body, \
                           email_table.c.path, email_table.c.label, email_table.c.is_scheduling]).where(email_table.c.id == email_id)
        rp = self.engine.execute(sel_stmt)
        record = rp.first()
        email = Email()
        if record is not None:
            email.date = record.date
            email.mime_type = record.mime_type
            email.from_addr = record.from_addr
            email.to_addr = record.to_adddr
            email.subject = record.subject
            email.body = record.body
            email.path = record.path
            email.label = record.label
            email.is_scheduling = record.is_scheduling
        
        return email
    
    def get_email(self, email_id):
        email_table = Table('raw_email', self.metadata)
        sel_stmt = select([email_table.c.date, email_table.c.mime_type, \
                           email_table.c.from_addr, email_table.c.to_addr, \
                           email_table.c.subject, email_table.c.body, \
                           email_table.c.path, email_table.c.label]).where(email_table.c.id == email_id)
        rp = self.engine.execute(sel_stmt)
        record = rp.first()
        email = Email()
        if record is not None:
            email.date = record.date
            email.mime_type = record.mime_type
            email.from_addr = record.from_addr
            email.to_addr = record.to_addr
            email.subject = record.subject
            email.body = record.body
            email.path = record.path
            email.label = record.label
        
        return email
        
    def get_emails_from(self, from_addr):
        email_table = Table('raw_email', self.metadata)
        sel_stmt = select([email_table.c.date, email_table.c.mime_type, \
                           email_table.c.from_addr, email_table.c.to_addr, \
                           email_table.c.subject, email_table.c.body, \
                           email_table.c.path, email_table.c.label]).where(email_table.c.from_addrr == from_addr)
        rp = self.engine.execute(sel_stmt)
        email_list = []
        for record in rp:
            email = Email()
            email.date = record.date
            email.mime_type = record.mime_type
            email.from_addr = record.from_addr
            email.to_addr = record.to_addr
            email.subject = record.subject
            email.body = record.body
            email.path = record.path
            email.label = record.label 
            email_list.append(email)
            
        return email_list
    
    def get_emails_before(self, query_date):
        email_table = Table('raw_email', self.metadata)
        sel_stmt = select([email_table.c.date, email_table.c.mime_type, \
                           email_table.c.from_addr, email_table.c.to_addr, \
                           email_table.c.subject, email_table.c.body, \
                           email_table.c.path, email_table.c.label]).where(email_table.c.date <= query_date)
        rp = self.engine.execute(sel_stmt)
        email_list = []
        for record in rp:
            email = Email()
            email.date = record.date
            email.mime_type = record.mime_type
            email.from_addr = record.from_addr
            email.to_addr = record.to_addr
            email.subject = record.subject
            email.body = record.body
            email.path = record.path
            email.label = record.label 
            email_list.append(email)
            
        return email_list        
        
    # EMAIL_ADDRESS table
    def insert_address(self, email_address):
        if type(email) != EmailAddress:
            print 'ERROR: input must be of type EmailAddress'
            return
        
        email_address_table = Table('email_address', self.metadata)
        ins_stmt = email_address_table.insert()
        conn = self.engine.connect()
        result = conn.execute(ins_stmt, address=email_address.address,
                              name=email_address.name)
    
    def get_address(self, address_id):
        email_address_table = Table('email_address', self.metadata)
        sel_stmt = select([email_address_table.c.name, email_address_table.c.address]).where(email_address_table.c.id == address_id)
        rp = self.engine.execute(sel_stmt)
        record = rp.first()
        email_address = EmailAddress()
        if record is not None:
            email_address.name = record.name
            email_address.address = record.address
        
        return email_address
    
    def get_address_name(self, address_id):
        email_address_table = Table('email_address', self.metadata)
        sel_stmt = select([email_address_table.c.name]).where(email_address_table.c.id == address_id)
        rp = self.engine.execute(sel_stmt)
        record = rp.first()
        email_address = EmailAddress()
        if record is not None:
            email_address.name = record.name
            email_address.address = record.address
        
        return email_address    
    
    # BRUSHED_EMAIL table
    def count_per_label(self, label):
        rp = self.engine.execute('select count(label) from brushed_email where label=%d'%(label))
        res = rp.first()
        return long(res[0])
    
    def get_all_brushed_labels_with_id(self):
        email_table = Table('brushed_email', self.metadata)
        sel_stmt = select([email_table.c.id, email_table.c.label])
        rp = self.engine.execute(sel_stmt)
        labels = []
        for record in rp:
            labels.append((record.id, record.label))
        return labels  
    
    def get_all_brushed_bodies_with_id(self):
        email_table = Table('brushed_email', self.metadata)
        sel_stmt = select([email_table.c.id, email_table.c.body])
        rp = self.engine.execute(sel_stmt)
        bodies = []
        for record in rp:
            bodies.append((record.id, record.body))
        return bodies

    def get_all_brushed_body_summary_with_id(self):
        email_table = Table('brushed_email', self.metadata)
        sel_stmt = select([email_table.c.id, email_table.c.subject, email_table.c.body, email_table.c.summary])
        rp = self.engine.execute(sel_stmt)
        bodies = []
        for record in rp:
            bodies.append((record.id, record.subject, record.body, record.summary))
        return bodies

    def get_all_brushed_lines_with_id(self):
        email_table = Table('brushed_email', self.metadata)
        sel_stmt = select([email_table.c.id, email_table.c.lines])
        rp = self.engine.execute(sel_stmt)
        lines = []
        for record in rp:
            lines.append((record.id, record.lines))
        return lines
    
    def get_all_brushed_verbs_with_id(self):
        email_table = Table('brushed_email', self.metadata)
        sel_stmt = select([email_table.c.id, email_table.c.verbs])
        rp = self.engine.execute(sel_stmt)
        brushed_verbs = []
        for record in rp:
            brushed_verbs.append((record.id, record.verbs))
        return brushed_verbs
    
    def get_all_brushed_verbs_per_label(self, label):
        email_table = Table('brushed_email', self.metadata)
        sel_stmt = select([email_table.c.id, email_table.c.verbs]).where(email_table.c.label==label)
        rp = self.engine.execute(sel_stmt)
        brushed_verbs = []
        for record in rp:
            brushed_verbs.append((record.id, record.verbs))
        return brushed_verbs
    
    def get_all_one_liners_per_label(self, label):
        email_table = Table('brushed_email', self.metadata)
        sel_stmt = select([email_table.c.id, email_table.c.one_line]).where(email_table.c.label==label)
        rp = self.engine.execute(sel_stmt)
        one_line = []
        for record in rp:
            one_line.append((record.id, record.one_line))
        return one_line    

    def update_brushed_body(self,email_id, body):
        brushed_table = Table('brushed_email', self.metadata)
        u = update(brushed_table)
        u = u.values(body=body)
        u = u.where(brushed_table.c.id==email_id)
        conn = self.engine.connect()
        result = conn.execute(u)
        
    def update_brushed_lines(self,email_id, msg_lines):
        brushed_table = Table('brushed_email', self.metadata)
        u = update(brushed_table)
        u = u.values(lines=msg_lines)
        u = u.where(brushed_table.c.id==email_id)
        conn = self.engine.connect()
        result = conn.execute(u)
    
    def update_brushed_one_line(self,email_id, one_line):
        brushed_table = Table('brushed_email', self.metadata)
        u = update(brushed_table)
        u = u.values(one_line=one_line)
        u = u.where(brushed_table.c.id==email_id)
        conn = self.engine.connect()
        result = conn.execute(u)
        
    def update_brushed_verbs(self, email_id, verbs):
        brushed_table = Table('brushed_email', self.metadata)
        u = update(brushed_table)
        u = u.values(verbs=verbs)
        u = u.where(brushed_table.c.id==email_id)
        conn = self.engine.connect()
        result = conn.execute(u)
        
    def update_brushed_summary(self, email_id, summary):
        brushed_table = Table('brushed_email', self.metadata)
        u = update(brushed_table)
        u = u.values(summary=summary)
        u = u.where(brushed_table.c.id==email_id)
        conn = self.engine.connect()
        result = conn.execute(u)


    # additional dataset, out of the labelled data
    def insert_brushed_email_more(self, email):
        if not isinstance(email, Email):
            print 'ERROR: input must be of type Email'
            return
        
        email_table = Table("brushed_email_more", self.metadata)
        ins_stmt = email_table.insert()
        conn = self.engine.connect()
        conn.execute(ins_stmt, date=email.date,
                              mime_type=email.mime_type,
                              from_addr=email.from_addr,
                              to_addr=email.to_addr,
                              subject=email.subject,
                              raw_body=email.body,
                              body=email.body,
                              all_lines=email.all_lines,
                              one_line=email.one_line,
                              path=email.path,
                              label=email.label,
                              prediction=email.prediction) 

    def insert_cleaned_email_full(self, email):
        if not isinstance(email, Email):
            print 'ERROR: input must be of type Email'
            return
        
        email_table = Table("email_full", self.metadata)
        ins_stmt = email_table.insert()
        conn = self.engine.connect()
        conn.execute(ins_stmt, date=email.date,
                              mime_type=email.mime_type,
                              from_addr=email.from_addr,
                              to_addr=email.to_addr,
                              subject=email.subject,
                              raw_body=email.raw_body,
                              body=email.body,
                              all_lines=email.all_lines,
                              one_line=email.one_line,
                              path=email.path) 
    
    def get_raw_bodies_with_id(self):
        email_table = Table('email_full', self.metadata)
        sel_stmt = select([email_table.c.id, email_table.c.raw_body])
        rp = self.engine.execute(sel_stmt)
        bodies = []
        for record in rp:
            bodies.append((record.id, record.raw_body))
        return bodies 
    
    def update_brushed_body_full(self,email_id, body):
        brushed_table = Table('email_full', self.metadata)
        u = update(brushed_table)
        u = u.values(body=body)
        u = u.where(brushed_table.c.id==email_id)
        conn = self.engine.connect()
        result = conn.execute(u)
    
    def update_brushed_lines_full(self,email_id, msg_lines):
        brushed_table = Table('email_full', self.metadata)
        u = update(brushed_table)
        u = u.values(all_lines=msg_lines)
        u = u.where(brushed_table.c.id==email_id)
        conn = self.engine.connect()
        result = conn.execute(u)
    
    def get_all_brushed_lines_with_id_full(self):
        email_table = Table('email_full', self.metadata)
        sel_stmt = select([email_table.c.id, email_table.c.all_lines])
        rp = self.engine.execute(sel_stmt)
        lines = []
        for record in rp:
            lines.append((record.id, record.all_lines))
        return lines
    
    def update_brushed_one_line_full(self,email_id, one_line):
        brushed_table = Table('email_full', self.metadata)
        u = update(brushed_table)
        u = u.values(one_line=one_line)
        u = u.where(brushed_table.c.id==email_id)
        conn = self.engine.connect()
        result = conn.execute(u)
    
    def get_email_full(self, email_id):
        email_table = Table('email_full', self.metadata)
        sel_stmt = select([email_table.c.id, email_table.c.raw_body]).where(email_table.c.id==email_id)
        rp = self.engine.execute(sel_stmt)
        bodies = []
        for record in rp:
            bodies.append((record.id, record.raw_body))
        return bodies[0]
    
    def get_all_brushed_emails_full(self):
        email_table = Table('email_full', self.metadata)
        sel_stmt = select([email_table.c.id, email_table.c.date, email_table.c.mime_type, \
                           email_table.c.from_addr, email_table.c.to_addr, \
                           email_table.c.subject, email_table.c.body, email_table.c.one_line, \
                           email_table.c.path, email_table.c.label, email_table.c.is_scheduling])
        rp = self.engine.execute(sel_stmt)
        emails = []
        for record in rp:
            email = Email()
            if record is not None:
                email.id = record.id
                email.date = record.date
                email.mime_type = record.mime_type
                email.from_addr = record.from_addr
                email.to_addr = record.to_addr
                email.subject = record.subject
                email.body = record.body
                email.one_line = record.one_line
                email.path = record.path
                email.label = record.label
                email.is_scheduling = record.is_scheduling or 0
            emails.append(email)
        return emails
    
    def get_all_brushed_email_more(self):
        email_table = Table('brushed_email_more', self.metadata)
        sel_stmt = select([email_table.c.id, email_table.c.date, email_table.c.mime_type, \
                           email_table.c.from_addr, email_table.c.to_addr, \
                           email_table.c.subject, email_table.c.body, email_table.c.one_line, \
                           email_table.c.path, email_table.c.label, email_table.c.is_scheduling])
        rp = self.engine.execute(sel_stmt)
        emails = []
        for record in rp:
            email = Email()
            if record is not None:
                email.id = record.id
                email.date = record.date
                email.mime_type = record.mime_type
                email.from_addr = record.from_addr
                email.to_addr = record.to_addr
                email.subject = record.subject
                email.body = record.body
                email.one_line = record.one_line
                email.path = record.path
                email.label = record.label
                email.is_scheduling = record.is_scheduling or 0
            emails.append(email)
        return emails
        
Exemple #18
0
class DatabaseHolder(object):
    """
    Object to represent a connection to a database.
    """
    def __init__(self,
                 name: str,
                 url: str,
                 srccfg: DB_SAFE_CONFIG_FWD_REF = None,
                 with_session: bool = False,
                 with_conn: bool = True,
                 reflect: bool = True,
                 encoding: str = 'utf-8',
                 echo: bool = False) -> None:
        """
        Args:
            name: internal database name
            url: SQLAlchemy URL
            srccfg: :class:`crate_anon.anonymise.config.DatabaseSafeConfig`
            with_session: create an SQLAlchemy Session?
            with_conn: create an SQLAlchemy connection (via an Engine)?
            reflect: read the database structure (when required)?
            encoding: passed to SQLAlchemy's :func:`create_engine`
            echo: passed to SQLAlchemy's :func:`create_engine`
        """
        self.name = name
        self.srccfg = srccfg
        self.engine = create_engine(url, encoding=encoding, echo=echo)
        self.conn = None  # type: Optional[Connection]
        self.session = None  # type: Optional[Session]
        self._reflect_on_request = reflect
        self._reflected = False
        self._table_names = []  # type: List[str]
        self._metadata = MetaData(bind=self.engine)
        log.debug(self.engine)  # obscures password

        if with_conn:  # for raw connections
            self.conn = self.engine.connect()
        if with_session:  # for ORM
            self.session = sessionmaker(bind=self.engine)()  # for ORM

    def _reflect(self) -> None:
        """
        Perform the database reflection.

        Reflection is expensive, so we defer unless required
        """
        if not self._reflect_on_request:
            return
        log.info(f"Reflecting database: {self.name}")
        # self.table_names = get_table_names(self.engine)
        self._metadata.reflect(views=True)  # include views
        self._table_names = [t.name for t in self._metadata.sorted_tables]
        self._reflected = True

    def update_metadata(self) -> None:
        """
        Updates the metadata, for example if a table has been dropped.
        """
        self._metadata = MetaData(bind=self.engine)

    @property
    def metadata(self) -> MetaData:
        """
        Returns the SQLAlchemy :class:`MetaData`. If reflection is enabled,
        ensure the database has been reflected first.
        """
        if not self._reflected:
            self._reflect()
        return self._metadata

    @property
    def table_names(self) -> List[str]:
        """
        Returns the table names from the database, if reflection is enabled.
        (Otherwise returns an empty list.)
        """
        if not self._reflected:
            self._reflect()
        return self._table_names
Exemple #19
0
class DBMS:
    """Implementation Philosophy:
    * Always use sqlalchemy API and avoid sql-dielect specific language.
    * Engine is provided externally. It is the end-user's business to make this engine.
    """
    def __init__(self, engine, db=None, sch=None, vws=False):
        self.eng = engine
        self.path = tb.P(self.eng.url.database)
        self.con = self.eng.connect()
        self.ses = sessionmaker()(bind=self.eng)  # ORM style
        self.db = db
        self.sch = sch
        self.vws = vws

        self.insp = None
        self.meta = MetaData()
        self.schema = None
        self.tables = None
        self.views = None
        self.sch_tab = None
        self.sch_vws = None
        self.refresh()

    def close(self):
        self.eng
        self.con.close()
        self.ses.close()

    def refresh(self, sch=None):
        # fails if multiple schemas are there and None is specified
        self.meta.reflect(bind=self.eng, schema=sch or self.sch)
        self.insp = inspect(subject=self.eng)

        self.schema = tb.L(self.insp.get_schema_names())
        self.schema.append(None)
        self.tables = self.schema.apply(
            lambda x: self.insp.get_table_names(schema=x))
        # self.tables = [self.meta.tables[tmp] for tmp in self.meta.tables.keys()]
        self.views = self.schema.apply(
            lambda x: self.insp.get_view_names(schema=x))
        self.sch_tab = tb.Struct.from_keys_values(self.schema, self.tables)
        self.sch_vws = tb.Struct.from_keys_values(self.schema, self.views)

        return self

    @classmethod
    def from_local_db(cls, path=None, echo=False):
        return cls(engine=cls.make_sql_db(path, echo))

    def __repr__(self):
        return f"DataBase @ {self.eng}"

    @staticmethod
    def make_sql_db(path=None,
                    echo=False,
                    dialect="sqlite",
                    driver=["pysqlite", "DBAPI"][0]):
        """Establish lazy initialization with database"""
        # core style, use in conjustction with Connect.
        if path == "memory":
            return create_engine(url=f"{dialect}+{driver}:///:memory:",
                                 echo=echo,
                                 future=True)
        if path is None:
            path = tb.P.tmpfile(folder="dbs", suffix=".db")
        print(f"Linking to database at {tb.P(path).as_uri()}")
        eng = create_engine(url=f"{dialect}+{driver}:///{path}",
                            echo=echo,
                            future=True)
        # echo flag is just a short for the more formal way of logging sql commands.
        return eng

    # ==================== QUERIES =====================================
    def execute_as_you_go(self, *commands, res_func=lambda x: x.all()):
        with self.eng.connect() as conn:
            for command in commands:
                result = conn.execute(text(command))
            conn.commit(
            )  # if driver is sqlite3, the connection is autocommitting.
            # this commit is only needed in case of DBAPI driver.
        return res_func(result)

    def execute_begin_once(self, command, res_func=lambda x: x.all()):
        with self.eng.begin() as conn:
            result = conn.execute(text(command))
            # no need for commit regardless of driver
            result = res_func(result)
        return result

    def execute(self, command):
        with self.eng.begin() as conn:
            result = conn.execute(text(command))
        return result

    def _get_table_identifier(self, table, sch):
        if sch is None: sch = self.sch
        if sch is not None:
            return sch + "." + table
        else:
            return table

    # ========================== TABLES =====================================
    def read_table(self, table, sch=None, size=100):
        res = self.con.execute(
            text(
                f"""SELECT * FROM {self._get_table_identifier(table, sch)}"""))
        return res.fetchmany(size)

    def make_df(self, table_name, records=None, schema=None):
        self.meta.reflect(bind=self.eng, schema=schema or self.sch)
        table = self.meta.tables[table_name]
        res = pd.DataFrame(records or self.ses.query(table).all(),
                           columns=table.exported_columns.keys())
        # the following spits an error if sqlalchemy is 2.0
        # df = pd.read_sql_table(table, con=self.eng, schema=schema or self.sch)
        return res

    def get_columns(self, table, sch=None):
        return self.meta.tables[self._get_table_identifier(
            table, sch)].exported_columns.keys()

    def insert_dicts(self, table, *mydicts):
        cmd = f"""INSERT INTO {table} VALUES """
        for mydict in mydicts:
            cmd += f"""({tuple(mydict)}), """
        self.execute_begin_once(cmd)

    def describe_table(self, table, sch=None, dtype=True):
        print(table.center(100, "="))
        self.refresh()
        tbl = self.meta.tables[table]
        count = self.ses.query(tbl).count()
        res = tb.Struct(name=table,
                        count=count,
                        size_mb=count * len(tbl.exported_columns) * 10 / 1e6)
        res.print(dtype=False, config=True)
        dat = self.read_table(table=table, sch=sch, size=2)
        cols = self.get_columns(table, sch=sch)
        df = pd.DataFrame.from_records(dat, columns=cols)
        print("SAMPLE:\n", df)
        if dtype:
            print("\n")
            print("DETAILED COLUMNS:\n",
                  tb.pd.DataFrame(self.insp.get_columns(table)))
            # print("DETAILED COLUMNS:\n", list(self.meta.tables[self._get_table_identifier(table, sch)].columns))
        print("\n" * 3)