Beispiel #1
0
def add_index(engine: Engine, table_name: str, column: sa.Column):
    """Create an index based on the column index definition

    calling the compiled SQL statement:

        CREATE INDEX index_name
        ON table_name (column_name)

    :param engine: the bound sql database engine
    :param table_name: the name of the table with the column
    :param column: the instantiated column definition

    :return: - nothing -

    """

    c_table_name = _compile_name(table_name, dialect=engine.dialect)

    c_column_name = column.compile(dialect=engine.dialect)

    index_name = "ix_%s_%s" % (table_name, column.name)
    c_index_name = _compile_name(index_name, dialect=engine.dialect)

    engine.execute('CREATE INDEX %s ON %s ( %s )' %
                   (c_index_name, c_table_name, c_column_name))
Beispiel #2
0
    def __to_db(self,
                dataframe: DataFrame,
                conn: Engine,
                params,
                **kwargs) -> str:
        table_name = params.get("table_name")
        batch_size = params.get("batch_size")
        mode = params.get("mode", 'append')
        index_flag = params.get("index")
        index_label = params.get("index_label")

        try:
            if mode == 'truncate' and conn.has_table(table_name=table_name):
                conn.execution_options(autoCommit=True)\
                    .execute(f"""TRUNCATE TABLE {table_name}""")
            dataframe.to_sql(con=conn,
                             name=table_name,
                             if_exists=self.modes.get(mode),
                             chunksize=batch_size,
                             index=index_flag,
                             index_label=index_label,
                             **kwargs)
        except Exception as err:
            msg = ("Error: Check your credentials (username,"
                   " password, host, port, database)\n")
            raise ValueError(msg, err)
Beispiel #3
0
 def __init__(self, pool, dialect, u, single_worker=True, **kwargs):
     if single_worker:
         worker = ExecutorThread()
     else:
         worker = None
     self._worker = worker
     self._engine = Engine(pool, dialect, u, **kwargs)
Beispiel #4
0
def iterate_csv_dir(db_engine: engine.Engine = None):

    current_dir = os.getcwd()
    db_config = config.DB()

    for _, services_dir, _ in os.walk(f'{current_dir}/csv'):
        for service_db in services_dir:
            db_engine = create_engine(db_config.print())

            db_connection = db_engine.raw_connection()

            try:
                db_cursor = db_connection.cursor()
                db_cursor.execute(
                    f"SELECT 1 FROM pg_catalog.pg_database WHERE datname = '{service_db}'"
                )
                db_exists = db_cursor.fetchone()
                if not db_exists:
                    db_cursor.execute(f'CREATE DATABASE {service_db}')
                db_cursor.close()
                db_connection.commit()

            finally:
                db_connection.close()
                db_engine.dispose()

            db_engine = create_engine(db_config.print(service_db))

            for filename in os.listdir(f'{current_dir}/csv/{service_db}'):
                filename = os.path.splitext(filename)[0]
                from_csv_to_db(service_db, filename, db_engine)

            db_engine.dispose()
Beispiel #5
0
def execute(engine: Engine, sql: str) -> None:
    log.debug(sql)
    if _print_not_execute:
        print(format_sql_for_print(sql) + "\n;")
        # extra \n in case the SQL ends in a comment
    else:
        engine.execute(sql)
Beispiel #6
0
    def delete(cls, db_engine: engine.Engine, email: str):

        query = """
        DELETE FROM {}.{} WHERE email = '{}'
        """.format(PATT_SCHEMA_NAME, cls.__tablename__, email)

        db_engine.execute(query)
Beispiel #7
0
    def __init__(self,
                 pool,
                 dialect,
                 url,
                 logging_name=None,
                 echo=None,
                 execution_options=None,
                 loop=None,
                 **kwargs):

        self._engine = Engine(pool,
                              dialect,
                              url,
                              logging_name=logging_name,
                              echo=echo,
                              execution_options=execution_options,
                              **kwargs)

        self._loop = loop

        max_workers = None

        # https://www.python.org/dev/peps/pep-0249/#threadsafety
        if dialect.dbapi.threadsafety < 2:
            # This might seem overly-restrictive, but when we instantiate an
            # AsyncioResultProxy from AsyncioEngine.execute, subsequent
            # fetchone calls could be in different threads. Let's limit to one.
            max_workers = 1

        self._engine_executor = ThreadPoolExecutor(max_workers=max_workers)
Beispiel #8
0
def create_tables_if_not_existing(engine: Engine):
    metadata = MetaData()
    metadata.bind = engine

    tablename_jira_issue_created = 'JiraIssueCreated'
    tablename_jira_issue_updated = 'JiraIssueUpdated'
    try:
        if not (engine.has_table(tablename_jira_issue_created)
                and engine.has_table(tablename_jira_issue_updated)):
            issue_created = Table(
                tablename_jira_issue_created, metadata,
                Column('issue', String(32), primary_key=True, nullable=False),
                Column('created', TIMESTAMP, nullable=False))
            issue_updated = Table(
                tablename_jira_issue_updated, metadata,
                Column('id',
                       BIGINT(),
                       Sequence('id', start=1, increment=1),
                       primary_key=True),
                Column('issue',
                       String(32),
                       ForeignKey(issue_created.c.issue),
                       nullable=False),
                Column('updated', TIMESTAMP, nullable=False),
                Column('issue_status', String(32)),
                Column('customer', String(200)))
            metadata.create_all()
    finally:
        engine.dispose()
Beispiel #9
0
    def upsert_admin(cls, db_engine: engine.Engine) -> None:

        db_config = Config()
        query = """
                INSERT INTO {0}.{1} 
                    (email, password, status, role, created_at, updated_at)
                    VALUES ('{2}', '{3}', '{4}', '{5}', '{6}', '{7}')
                ON CONFLICT (email)
                DO UPDATE SET 
                    email = excluded.email,
                    password = excluded.password,
                    status = excluded.status,
                    role = excluded.role,
                    updated_at = excluded.updated_at
                    """.format(
            PATT_SCHEMA_NAME,
            cls.__tablename__,
            db_config.admin_email,
            Bcrypt().generate_password_hash(
                db_config.admin_password).decode("utf-8"),
            UserStatuses.confirmed,
            Roles.admin,
            datetime.utcnow(),
            datetime.utcnow(),
        )
        db_engine.execute(query)
Beispiel #10
0
def check_or_update_odc_schema(engine: Engine):
    """
    Check that the ODC schema is updated enough to run Explorer,

    and either update it safely (if we have permission), or tell the user how.
    """
    # We need the `update` column on ODC's dataset table in order to run incremental product refreshes.
    try:
        # We can try to install it ourselves if we have permission, using ODC's code.
        if not pg_column_exists(engine, ODC_DATASET.fullname, "updated"):
            _LOG.warn("schema.applying_update.add_odc_change_triggers")
            _utils.install_timestamp_trigger(engine)
    except ProgrammingError as e:
        # We don't have permission.
        raise SchemaNotRefreshable(
            dedent("""
            Missing update triggers.

            No dataset-update triggers are installed on the ODC instance, and Explorer does
            not have enough permissions to add them itself.

            It's recommended to run `datacube system init` on your ODC instance to install them.

            Then try this again.
        """)) from e

    # Add optional indexes to AGDC if we have permission.
    # (otherwise we warn the user that it may be slow, and how to add it themselves)
    statements = []
    try:
        if not pg_index_exists(engine, ODC_DATASET.schema, ODC_DATASET.name,
                               "ix_dataset_added"):
            _LOG.warn("schema.applying_update.add_odc_added_index")
            statements.append(
                f"create index ix_dataset_added on {ODC_DATASET.fullname}(added desc);"
            )
        if not pg_index_exists(engine, ODC_DATASET.schema, ODC_DATASET.name,
                               "ix_dataset_type_changed"):
            _LOG.warn("schema.applying_update.add_odc_changed_index")
            statements.append(
                f"create index ix_dataset_type_changed on "
                f"{ODC_DATASET.fullname}(dataset_type_ref, greatest(added, updated, archived) desc);"
            )
        while statements:
            engine.execute(statements[-1])
            statements.pop()
    except ProgrammingError:
        unexecuted_sql = "\n                ".join(statements)
        warnings.warn(
            dedent(f"""
            No recently-added index.
            Explorer recommends adding an index for recently-added datasets to your ODC,
            but does not have permission to add it to the current ODC database.

            It's recommended to add it manually in Postgres:

                {unexecuted_sql}
        """))
        raise
Beispiel #11
0
def drop_all(engine: Engine) -> None:
    from quiz_bot.db.base import metadata

    click.echo('Dropping schema...')
    for table in metadata.tables:
        engine.execute(f'DROP TABLE IF EXISTS "{table}" CASCADE')
    metadata.drop_all()
    click.echo('Schema successfully dropped!')
Beispiel #12
0
def update_schema(engine: Engine):
    """Update the schema if needed."""
    if not pg_column_exists(engine, f"{CUBEDASH_SCHEMA}.product",
                            "fixed_metadata"):
        _LOG.info("schema.applying_update.add_fixed_metadata")
        engine.execute(f"""
        alter table {CUBEDASH_SCHEMA}.product add column fixed_metadata jsonb
        """)
Beispiel #13
0
def establish_connection(bind: engine.Engine) -> engine.Engine:
    for _ in range(100):
        try:
            bind.connect()
            break
        except exc.OperationalError:
            time.sleep(0.05)
    return bind
Beispiel #14
0
def does_postgres_accept_connection(engine: Engine) -> bool:
    """ Test if the target PostgreSQL database accept connexions
    """
    try:
        engine.connect()
    except OperationalError:
        return False
    else:
        return True
def create_index(engine: Engine, directory: str):
    try:
        print("Indexing...")
        engine.execute(
            f"CREATE INDEX {directory}_index ON {directory} (term, year)")
        print(f"Index created for {directory}")
    except NoSuchTableError:
        print(
            f"Index creation failed because table '{directory}' does not exist. "
            f"Check if directory '{directory}' is empty.")
Beispiel #16
0
def delete_trigger(name: str, engine: Engine, table: str, schema: str = None):
    """Delete a trigger context (if exists) on database.

    Args:
        name (str): The trigger name.
        engine (Engine): The SQLAlchemy active database engine.
        table (str): The table name.
        schema (str): The table schema that the trigger is attached.
    """
    schema = schema or 'public'

    engine.execute(f'DROP TRIGGER IF EXISTS {name} ON {schema}.{table}')
Beispiel #17
0
def wait_for_postgres(engine: Engine, max_waiting_time: int = 10):
    logging.info('Waiting until PostgreSQL accept connexions')
    for i in range(max_waiting_time):
        if does_postgres_accept_connection(engine):
            logging.info('PostgreSQL is ready to accept connexions')
            return
        logging.info(
            'PostgreSQL is not ready to accept connexions, waiting {} more seconds'
            .format(max_waiting_time - i))
        sleep(1)

    engine.connect()  # Raise exception
Beispiel #18
0
def change_column_length(table: Table, column: Column, length: int,
                         engine: Engine) -> None:
    """ Change the column length in the supplied table
    """
    if column.type.length < length:
        print("Changing length of {} from {} to {}".format(
            column, column.type.length, length))
        column.type.length = length
        column_name = column.name
        column_type = column.type.compile(engine.dialect)
        engine.execute(
            'ALTER TABLE {table} ALTER COLUMN {column_name} TYPE {column_type}'
            .format(**locals()))
def _insert_data(engine: Engine, table: Union[Table, Base],
                 buffer: List[Dict[str, Any]]) -> None:
    """
    Inserts all records stored in buffer to the specified table using the specified engine. Does nothing, if buffer is
    empty.

    :param engine: Database engine.
    :param table: Database table, records are inserted into.
    :param buffer: List of new data to be inserted.
    """
    if len(buffer) > 0:
        if isinstance(table, Table):
            engine.execute(table.insert(), buffer)
        else:
            engine.execute(table.__table__.insert(), buffer)
def get_tables(connection: Engine) -> pd.DataFrame:
    dfs = []
    connection.execute(f"USE WAREHOUSE {warehouse};")
    query = ("SELECT TABLE_CATALOG, TABLE_SCHEMA, "
             "concat(TABLE_CATALOG,'_', TABLE_SCHEMA) as SCHEMA_ID, "
             "TABLE_NAME, concat(schema_id,'_',TABLE_NAME) as TABLE_ID, "
             "ROW_COUNT, CREATED, LAST_ALTERED "
             "FROM information_schema.TABLES "
             "WHERE TABLE_SCHEMA NOT IN ('PUBLIC', 'INFORMATION_SCHEMA');")
    for db in DATABASES:
        connection.execute(f"USE DATABASE {db};")
        df = pd.read_sql(query, connection)
        dfs.append(df)
    df = pd.concat(dfs, ignore_index=True)
    return df
def load_charges(engine: Engine,
                 npartitions: int = None,
                 clear_existing: bool = False):
    ddf = read_raw_data(npartitions)
    ddf = clean_data(ddf)

    if clear_existing:
        logger.info("Clearing any existing expungement data")
        for table in [runs, charges, features, outcomes]:
            logger.info(f"Deleting from: {table.name}")
            engine.execute(f"""
                DELETE FROM {table.name}
            """)

    load_to_db(ddf, target_table=charges, engine=engine, include_index=False)
Beispiel #22
0
def column_reflection_fallback(
        selectable: Select, dialect: Dialect,
        sqlalchemy_engine: Engine) -> List[Dict[str, str]]:
    """If we can't reflect the table, use a query to at least get column names."""
    col_info_dict_list: List[Dict[str, str]]
    if dialect.name.lower() == "mssql":
        # Get column names and types from the database
        # Reference: https://dataedo.com/kb/query/sql-server/list-table-columns-in-database
        columns_query: str = f"""
SELECT
    SCHEMA_NAME(tab.schema_id) AS schema_name,
    tab.name AS table_name, 
    col.column_id AS column_id,
    col.name AS column_name, 
    t.name AS column_data_type,    
    col.max_length AS column_max_length,
    col.precision AS column_precision
FROM sys.tables AS tab
    INNER JOIN sys.columns AS col
    ON tab.object_id = col.object_id
    LEFT JOIN sys.types AS t
    ON col.user_type_id = t.user_type_id
WHERE tab.name = '{selectable}'
ORDER BY schema_name,
    table_name, 
    column_id
"""
        col_info_query: TextClause = sa.text(columns_query)
        col_info_tuples_list: List[tuple] = sqlalchemy_engine.execute(
            col_info_query).fetchall()
        # type_module = _get_dialect_type_module(dialect=dialect)
        col_info_dict_list: List[Dict[str, str]] = [
            {
                "name": column_name,
                # "type": getattr(type_module, column_data_type.upper())(),
                "type": column_data_type.upper(),
            } for schema_name, table_name, column_id, column_name,
            column_data_type, column_max_length, column_precision in
            col_info_tuples_list
        ]
    else:
        query: Select = sa.select([sa.text("*")
                                   ]).select_from(selectable).limit(1)
        result_object = sqlalchemy_engine.execute(query)
        # noinspection PyProtectedMember
        col_names: List[str] = result_object._metadata.keys
        col_info_dict_list = [{"name": col_name} for col_name in col_names]
    return col_info_dict_list
    def _check_usage_date_ranges(self, engine: Engine) -> Any:

        query = """
            select
                min(query_start_time) as min_time,
                max(query_start_time) as max_time
            from snowflake.account_usage.access_history
        """
        with PerfTimer() as timer:
            try:
                for db_row in engine.execute(query):
                    if len(db_row) < 2 or db_row[0] is None or db_row[1] is None:
                        self.warn(
                            logger,
                            "check-usage-data",
                            f"Missing data for access_history {db_row} - Check if using Enterprise edition of Snowflake",
                        )
                        continue
                    self.report.min_access_history_time = db_row[0].astimezone(
                        tz=timezone.utc
                    )
                    self.report.max_access_history_time = db_row[1].astimezone(
                        tz=timezone.utc
                    )
                    self.report.access_history_range_query_secs = round(
                        timer.elapsed_seconds(), 2
                    )
            except Exception as e:
                self.error(logger, "check-usage-data", f"Error was {e}")
Beispiel #24
0
def _populate_missing_dataset_extents(engine: Engine, product: DatasetType):
    query = (
        postgres.insert(DATASET_SPATIAL)
        .from_select(
            [
                "id",
                "dataset_type_ref",
                "center_time",
                "footprint",
                "region_code",
                "size_bytes",
                "creation_time",
            ],
            _select_dataset_extent_query(product),
        )
        .on_conflict_do_nothing(index_elements=["id"])
    )

    _LOG.debug(
        "spatial_insert_query.start",
        product_name=product.name,
        # query_sql=as_sql(query),
    )
    inserted = engine.execute(query).rowcount
    _LOG.debug("spatial_insert_query.end", product_name=product.name, inserted=inserted)
    return inserted
Beispiel #25
0
def datasets_by_region(
    engine: Engine,
    index: Index,
    product_name: str,
    region_code: str,
    time_range: Range,
    limit: int,
    offset: int = 0,
) -> Generator[Dataset, None, None]:
    product = index.products.get_by_name(product_name)
    query = (select(postgres_api._DATASET_SELECT_FIELDS).select_from(
        DATASET_SPATIAL.join(
            DATASET, DATASET_SPATIAL.c.id == DATASET.c.id)).where(
                DATASET_SPATIAL.c.region_code == bindparam(
                    "region_code", region_code)).where(
                        DATASET_SPATIAL.c.dataset_type_ref == bindparam(
                            "dataset_type_ref", product.id)))
    if time_range:
        query = query.where(DATASET_SPATIAL.c.center_time > bindparam(
            "from_time", time_range.begin)).where(
                DATASET_SPATIAL.c.center_time < bindparam(
                    "to_time", time_range.end))
    query = (query.order_by(DATASET_SPATIAL.c.center_time).limit(
        bindparam("limit", limit)).offset(bindparam("offset", offset)))

    return (index.datasets._make(res, full_info=True)
            for res in engine.execute(query).fetchall())
Beispiel #26
0
def get_page_views(engine: Engine, mode: str = 'current') -> Union[int, None]:
    """Get page views for current date or all

    :param engine: SQLAlchemy engine object
    :param mode: page view aggregation method ('current', 'all')
    """

    # SQL query return placeholder
    result = []

    # get page views from database
    with engine.connect() as conn:
        if mode == 'all':
            result = conn.execute('SELECT total(page_views) FROM stats')
        elif mode == 'current':
            result = conn.execute('SELECT page_views FROM stats'
                                  ' WHERE tick_date = ?', date_now())

        # unpack results into list of JSON records
        result = [dict(row) for row in result]

        # check results and return sanitized value
        if len(result) > 0:
            return int(list(result[0].values())[0])
        else:
            return None
Beispiel #27
0
def load_svt_extra_db(
        engine: Engine,
        svtExtras: list[MstSvtExtra]) -> None:  # pragma: no cover
    svtExtra_db_data = [svtExtra.dict() for svtExtra in svtExtras]
    with engine.begin() as conn:
        recreate_table(conn, mstSvtExtra)
        conn.execute(mstSvtExtra.insert(), svtExtra_db_data)
Beispiel #28
0
def populate(engine: Engine, data_path: str):
    conn = engine.raw_connection()
    cursor = conn.cursor()

    # keep
    insert_reviews = """
        INSERT INTO review (
        ID, username, movie_id, review, rating)
        VALUES (?, ?, ?, ?, ?)"""
    cursor.executemany(insert_reviews, generic_generator(os.path.join(data_path, 'reviews.csv')))

    # keep
    insert_users = """
            INSERT INTO users (
            ID, username, password)
            VALUES (?, ?, ?)"""
    cursor.executemany(insert_users, generic_generator(os.path.join(data_path, 'users.csv')))

    # keep
    insert_movies = """
        INSERT INTO movies (
        Rank, Title, Genre, Description, Director, Actors, Year, Runtime, Rating, Votes, Revenue, Metascore)
        VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"""
    cursor.executemany(insert_movies, generic_generator(os.path.join(data_path, 'Data1000Movies.csv')))
    # sqlite3.OperationalError: near "(": syntax error

    conn.commit()
    conn.close()
    def write_to_db(self, eng: Engine):
        conn = eng.raw_connection()
        cursor = conn.cursor()

        try:
            cursor.callproc("create_class_section",
                            [self.crn,
                             self.class_dept,
                             self.class_number,
                             self.professor,
                             self.capacity,
                             self.registered,
                             self.semester_id])
            for meeting_time in self.meeting_times:
                meeting_time.write_to_db(cursor)
            for restriction in self.restrictions:
                restriction.write_to_db(cursor)
        except err.IntegrityError as e:
            code = e.args[0]
            if code == 1062:
                print("IGNORING.")
            else:
                conn.rollback()
                print("ERROR CREATING SECTION", e)
        except err.InternalError as e:
            conn.rollback()
            print("ERROR CREATING SECTION", e)
        finally:
            conn.commit()
            cursor.close()
            conn.close()
Beispiel #30
0
def get_posts(engine: Engine, post_id: int = None) -> list:
    """
    Get all blog posts as a list of table records

    :param engine: SQLAlchemy engine object
    :param post_id: blog entry 'id' (optional)
    :return: list of post records
    """

    with engine.connect() as conn:
        if post_id is not None:
            result = conn.execute('SELECT * FROM entries WHERE id = ?',
                                  post_id)
        else:
            result = conn.execute('SELECT * FROM entries')

        # unpack results into list of JSON records
        posts = [dict(row) for row in result]

        # data correctly retrieved
        if len(posts) > 0:

            # unpack post 'tags'
            for i in range(len(posts)):
                posts[i]['tags'] = posts[i]['tags'].split(',')

        return posts
Beispiel #31
0
def check_database(engine: Engine, user_name: pwd.struct_passwd,
                   tables: Iterable[Table]):
    logger.info("Checking database access as user %s", user_name)
    try:
        conn = engine.connect()
    except DBAPIError as e:
        logger.critical("Could not connect to database as %s: %s",
                        user_name, e)
        raise
    with contextlib.closing(conn):
        for table in tables:
            try:
                check_table(conn, table)
            except DBAPIError as e:
                logger.critical("Query check for table %s as user %s failed: "
                                "%s", table.name, user_name, e)
                raise
Beispiel #32
0
def get_revision(
        config: Config,
        engine: Engine,
        script: ScriptDirectory,
        revision_type='current'
) -> str:
    """
    Helper to get revision id
    """
    with engine.connect() as conn:
        with EnvironmentContext(config, script) as env_context:
            env_context.configure(conn, version_table="migrate_version")
            if revision_type == 'head':
                revision = env_context.get_head_revision()
            else:
                migration_context = env_context.get_context()
                revision = migration_context.get_current_revision()
    return revision