def add_index(engine: Engine, table_name: str, column: sa.Column): """Create an index based on the column index definition calling the compiled SQL statement: CREATE INDEX index_name ON table_name (column_name) :param engine: the bound sql database engine :param table_name: the name of the table with the column :param column: the instantiated column definition :return: - nothing - """ c_table_name = _compile_name(table_name, dialect=engine.dialect) c_column_name = column.compile(dialect=engine.dialect) index_name = "ix_%s_%s" % (table_name, column.name) c_index_name = _compile_name(index_name, dialect=engine.dialect) engine.execute('CREATE INDEX %s ON %s ( %s )' % (c_index_name, c_table_name, c_column_name))
def execute(engine: Engine, sql: str) -> None: log.debug(sql) if _print_not_execute: print(format_sql_for_print(sql) + "\n;") # extra \n in case the SQL ends in a comment else: engine.execute(sql)
def upsert_admin(cls, db_engine: engine.Engine) -> None: db_config = Config() query = """ INSERT INTO {0}.{1} (email, password, status, role, created_at, updated_at) VALUES ('{2}', '{3}', '{4}', '{5}', '{6}', '{7}') ON CONFLICT (email) DO UPDATE SET email = excluded.email, password = excluded.password, status = excluded.status, role = excluded.role, updated_at = excluded.updated_at """.format( PATT_SCHEMA_NAME, cls.__tablename__, db_config.admin_email, Bcrypt().generate_password_hash( db_config.admin_password).decode("utf-8"), UserStatuses.confirmed, Roles.admin, datetime.utcnow(), datetime.utcnow(), ) db_engine.execute(query)
def delete(cls, db_engine: engine.Engine, email: str): query = """ DELETE FROM {}.{} WHERE email = '{}' """.format(PATT_SCHEMA_NAME, cls.__tablename__, email) db_engine.execute(query)
def check_or_update_odc_schema(engine: Engine): """ Check that the ODC schema is updated enough to run Explorer, and either update it safely (if we have permission), or tell the user how. """ # We need the `update` column on ODC's dataset table in order to run incremental product refreshes. try: # We can try to install it ourselves if we have permission, using ODC's code. if not pg_column_exists(engine, ODC_DATASET.fullname, "updated"): _LOG.warn("schema.applying_update.add_odc_change_triggers") _utils.install_timestamp_trigger(engine) except ProgrammingError as e: # We don't have permission. raise SchemaNotRefreshable( dedent(""" Missing update triggers. No dataset-update triggers are installed on the ODC instance, and Explorer does not have enough permissions to add them itself. It's recommended to run `datacube system init` on your ODC instance to install them. Then try this again. """)) from e # Add optional indexes to AGDC if we have permission. # (otherwise we warn the user that it may be slow, and how to add it themselves) statements = [] try: if not pg_index_exists(engine, ODC_DATASET.schema, ODC_DATASET.name, "ix_dataset_added"): _LOG.warn("schema.applying_update.add_odc_added_index") statements.append( f"create index ix_dataset_added on {ODC_DATASET.fullname}(added desc);" ) if not pg_index_exists(engine, ODC_DATASET.schema, ODC_DATASET.name, "ix_dataset_type_changed"): _LOG.warn("schema.applying_update.add_odc_changed_index") statements.append( f"create index ix_dataset_type_changed on " f"{ODC_DATASET.fullname}(dataset_type_ref, greatest(added, updated, archived) desc);" ) while statements: engine.execute(statements[-1]) statements.pop() except ProgrammingError: unexecuted_sql = "\n ".join(statements) warnings.warn( dedent(f""" No recently-added index. Explorer recommends adding an index for recently-added datasets to your ODC, but does not have permission to add it to the current ODC database. It's recommended to add it manually in Postgres: {unexecuted_sql} """)) raise
def drop_all(engine: Engine) -> None: from quiz_bot.db.base import metadata click.echo('Dropping schema...') for table in metadata.tables: engine.execute(f'DROP TABLE IF EXISTS "{table}" CASCADE') metadata.drop_all() click.echo('Schema successfully dropped!')
def update_schema(engine: Engine): """Update the schema if needed.""" if not pg_column_exists(engine, f"{CUBEDASH_SCHEMA}.product", "fixed_metadata"): _LOG.info("schema.applying_update.add_fixed_metadata") engine.execute(f""" alter table {CUBEDASH_SCHEMA}.product add column fixed_metadata jsonb """)
def create_index(engine: Engine, directory: str): try: print("Indexing...") engine.execute( f"CREATE INDEX {directory}_index ON {directory} (term, year)") print(f"Index created for {directory}") except NoSuchTableError: print( f"Index creation failed because table '{directory}' does not exist. " f"Check if directory '{directory}' is empty.")
def delete_trigger(name: str, engine: Engine, table: str, schema: str = None): """Delete a trigger context (if exists) on database. Args: name (str): The trigger name. engine (Engine): The SQLAlchemy active database engine. table (str): The table name. schema (str): The table schema that the trigger is attached. """ schema = schema or 'public' engine.execute(f'DROP TRIGGER IF EXISTS {name} ON {schema}.{table}')
def change_column_length(table: Table, column: Column, length: int, engine: Engine) -> None: """ Change the column length in the supplied table """ if column.type.length < length: print("Changing length of {} from {} to {}".format( column, column.type.length, length)) column.type.length = length column_name = column.name column_type = column.type.compile(engine.dialect) engine.execute( 'ALTER TABLE {table} ALTER COLUMN {column_name} TYPE {column_type}' .format(**locals()))
def _insert_data(engine: Engine, table: Union[Table, Base], buffer: List[Dict[str, Any]]) -> None: """ Inserts all records stored in buffer to the specified table using the specified engine. Does nothing, if buffer is empty. :param engine: Database engine. :param table: Database table, records are inserted into. :param buffer: List of new data to be inserted. """ if len(buffer) > 0: if isinstance(table, Table): engine.execute(table.insert(), buffer) else: engine.execute(table.__table__.insert(), buffer)
def get_tables(connection: Engine) -> pd.DataFrame: dfs = [] connection.execute(f"USE WAREHOUSE {warehouse};") query = ("SELECT TABLE_CATALOG, TABLE_SCHEMA, " "concat(TABLE_CATALOG,'_', TABLE_SCHEMA) as SCHEMA_ID, " "TABLE_NAME, concat(schema_id,'_',TABLE_NAME) as TABLE_ID, " "ROW_COUNT, CREATED, LAST_ALTERED " "FROM information_schema.TABLES " "WHERE TABLE_SCHEMA NOT IN ('PUBLIC', 'INFORMATION_SCHEMA');") for db in DATABASES: connection.execute(f"USE DATABASE {db};") df = pd.read_sql(query, connection) dfs.append(df) df = pd.concat(dfs, ignore_index=True) return df
def load_charges(engine: Engine, npartitions: int = None, clear_existing: bool = False): ddf = read_raw_data(npartitions) ddf = clean_data(ddf) if clear_existing: logger.info("Clearing any existing expungement data") for table in [runs, charges, features, outcomes]: logger.info(f"Deleting from: {table.name}") engine.execute(f""" DELETE FROM {table.name} """) load_to_db(ddf, target_table=charges, engine=engine, include_index=False)
def column_reflection_fallback( selectable: Select, dialect: Dialect, sqlalchemy_engine: Engine) -> List[Dict[str, str]]: """If we can't reflect the table, use a query to at least get column names.""" col_info_dict_list: List[Dict[str, str]] if dialect.name.lower() == "mssql": # Get column names and types from the database # Reference: https://dataedo.com/kb/query/sql-server/list-table-columns-in-database columns_query: str = f""" SELECT SCHEMA_NAME(tab.schema_id) AS schema_name, tab.name AS table_name, col.column_id AS column_id, col.name AS column_name, t.name AS column_data_type, col.max_length AS column_max_length, col.precision AS column_precision FROM sys.tables AS tab INNER JOIN sys.columns AS col ON tab.object_id = col.object_id LEFT JOIN sys.types AS t ON col.user_type_id = t.user_type_id WHERE tab.name = '{selectable}' ORDER BY schema_name, table_name, column_id """ col_info_query: TextClause = sa.text(columns_query) col_info_tuples_list: List[tuple] = sqlalchemy_engine.execute( col_info_query).fetchall() # type_module = _get_dialect_type_module(dialect=dialect) col_info_dict_list: List[Dict[str, str]] = [ { "name": column_name, # "type": getattr(type_module, column_data_type.upper())(), "type": column_data_type.upper(), } for schema_name, table_name, column_id, column_name, column_data_type, column_max_length, column_precision in col_info_tuples_list ] else: query: Select = sa.select([sa.text("*") ]).select_from(selectable).limit(1) result_object = sqlalchemy_engine.execute(query) # noinspection PyProtectedMember col_names: List[str] = result_object._metadata.keys col_info_dict_list = [{"name": col_name} for col_name in col_names] return col_info_dict_list
def get_users_ids_from_db(cls, db_engine: engine.Engine, test_name: str, population_name: str): query = """ SELECT user_id FROM {}.{} WHERE test_name = '{}' AND population_name = '{}' """.format(PATT_SCHEMA_NAME, cls.__tablename__, test_name, population_name) return db_engine.execute(query)
def _get_redshift_history(self, query: str, engine: Engine) -> Optional[Iterable[Any]]: results = engine.execute(query) events = [] for row in results: # minor type conversion if hasattr(row, "_asdict"): event_dict = row._asdict() else: event_dict = dict(row) # stripping extra spaces caused by above _asdict() conversion for k, v in event_dict.items(): if isinstance(v, str): event_dict[k] = v.strip() if event_dict.get("starttime", None): event_dict["starttime"] = event_dict.get("starttime").__str__() if event_dict.get("endtime", None): event_dict["endtime"] = event_dict.get("endtime").__str__() logger.debug(f"event_dict: {event_dict}") events.append(event_dict) if events: return events # SQL results can be empty. If results is empty, the SQL connection closes. # Then, we don't want to proceed ingestion. logging.info("SQL Result is empty") return None
def _check_usage_date_ranges(self, engine: Engine) -> Any: query = """ select min(query_start_time) as min_time, max(query_start_time) as max_time from snowflake.account_usage.access_history """ with PerfTimer() as timer: try: for db_row in engine.execute(query): if len(db_row) < 2 or db_row[0] is None or db_row[1] is None: self.warn( logger, "check-usage-data", f"Missing data for access_history {db_row} - Check if using Enterprise edition of Snowflake", ) continue self.report.min_access_history_time = db_row[0].astimezone( tz=timezone.utc ) self.report.max_access_history_time = db_row[1].astimezone( tz=timezone.utc ) self.report.access_history_range_query_secs = round( timer.elapsed_seconds(), 2 ) except Exception as e: self.error(logger, "check-usage-data", f"Error was {e}")
def mssql_get_pk_index_name(engine: Engine, tablename: str, schemaname: str = MSSQL_DEFAULT_SCHEMA) -> str: """ For Microsoft SQL Server specifically: fetch the name of the PK index for the specified table (in the specified schema), or ``''`` if none is found. """ # http://docs.sqlalchemy.org/en/latest/core/connections.html#sqlalchemy.engine.Connection.execute # noqa # http://docs.sqlalchemy.org/en/latest/core/sqlelement.html#sqlalchemy.sql.expression.text # noqa # http://docs.sqlalchemy.org/en/latest/core/sqlelement.html#sqlalchemy.sql.expression.TextClause.bindparams # noqa # http://docs.sqlalchemy.org/en/latest/core/connections.html#sqlalchemy.engine.ResultProxy # noqa query = text(""" SELECT kc.name AS index_name FROM sys.key_constraints AS kc INNER JOIN sys.tables AS ta ON ta.object_id = kc.parent_object_id INNER JOIN sys.schemas AS s ON ta.schema_id = s.schema_id WHERE kc.[type] = 'PK' AND ta.name = :tablename AND s.name = :schemaname """).bindparams( tablename=tablename, schemaname=schemaname, ) with contextlib.closing( engine.execute(query)) as result: # type: ResultProxy # noqa row = result.fetchone() return row[0] if row else ''
async def test_delete_task_handler(cli: TestClient, db: Engine, token: str) -> None: tasks = [Task(title="Уборочка")] schema: BaseSchema = TaskSchema() serialized_tasks, _ = schema.dump(tasks, many=True) db.execute(sa.insert(tables.task).values(serialized_tasks)) response: ClientResponse = await cli.post(f"/tasks/{tasks[0].id}/delete", headers={"Authorization": token}) assert response.status == 200 db_task = db.execute( sa.select(["*"]).where(tables.task.c.id == tasks[0].id)).first() assert not db_task
def mssql_table_has_ft_index(engine: Engine, tablename: str, schemaname: str = MSSQL_DEFAULT_SCHEMA) -> bool: """ For Microsoft SQL Server specifically: does the specified table (in the specified schema) have at least one full-text index? """ query = text(""" SELECT COUNT(*) FROM sys.key_constraints AS kc INNER JOIN sys.tables AS ta ON ta.object_id = kc.parent_object_id INNER JOIN sys.schemas AS s ON ta.schema_id = s.schema_id INNER JOIN sys.fulltext_indexes AS fi ON fi.object_id = ta.object_id WHERE ta.name = :tablename AND s.name = :schemaname """).bindparams( tablename=tablename, schemaname=schemaname, ) with contextlib.closing( engine.execute(query)) as result: # type: ResultProxy # noqa row = result.fetchone() return row[0] > 0
def _try_fetching_version_info_for_schema( engine: Engine, schema: str) -> Optional[Tuple[int, int]]: sql = SQL_SELECT_VERSIONS.format(quote=_quote_char(engine)) query = sa_text(sql) result = engine.execute(query, col=schema) rows = result.fetchall() # type: List[Tuple[int, int]] return rows[0] if len(rows) == 1 else None
def _populate_missing_dataset_extents(engine: Engine, product: DatasetType): query = ( postgres.insert(DATASET_SPATIAL) .from_select( [ "id", "dataset_type_ref", "center_time", "footprint", "region_code", "size_bytes", "creation_time", ], _select_dataset_extent_query(product), ) .on_conflict_do_nothing(index_elements=["id"]) ) _LOG.debug( "spatial_insert_query.start", product_name=product.name, # query_sql=as_sql(query), ) inserted = engine.execute(query).rowcount _LOG.debug("spatial_insert_query.end", product_name=product.name, inserted=inserted) return inserted
def is_included_server_side(db_engine: Engine, metadata: MetaData, fk_tbfullname, fk_field_names, pk_tbfullname, pk_field_names, sampling=0): tb_fk: Table = metadata.tables[fk_tbfullname] tb_pk: Table = metadata.tables[pk_tbfullname] tb_fk = tb_fk.alias('A') tb_pk = tb_pk.alias('B') tb_sample_fk = tb_fk.alias('C') if sampling > 0: tb_sample_fk = tb_fk.tablesample(sampling, name='C', seed=text('{}'.format(SEED))) fk_fields = [tb_sample_fk.columns[col] for col in fk_field_names] pk_fields = [tb_pk.columns[col] for col in pk_field_names] query = select(fk_fields).\ select_from( tb_sample_fk.join( tb_pk, and_(fk_f == pk_f for fk_f, pk_f in zip(fk_fields, pk_fields)), isouter=True)).\ where(and_(pk_f.is_(None) for pk_f in pk_fields)).limit(1) res: ResultProxy = db_engine.execute(query) first_res = res.first() not_included = first_res is None return not_included
def datasets_by_region( engine: Engine, index: Index, product_name: str, region_code: str, time_range: Range, limit: int, offset: int = 0, ) -> Generator[Dataset, None, None]: product = index.products.get_by_name(product_name) query = (select(postgres_api._DATASET_SELECT_FIELDS).select_from( DATASET_SPATIAL.join( DATASET, DATASET_SPATIAL.c.id == DATASET.c.id)).where( DATASET_SPATIAL.c.region_code == bindparam( "region_code", region_code)).where( DATASET_SPATIAL.c.dataset_type_ref == bindparam( "dataset_type_ref", product.id))) if time_range: query = query.where(DATASET_SPATIAL.c.center_time > bindparam( "from_time", time_range.begin)).where( DATASET_SPATIAL.c.center_time < bindparam( "to_time", time_range.end)) query = (query.order_by(DATASET_SPATIAL.c.center_time).limit( bindparam("limit", limit)).offset(bindparam("offset", offset))) return (index.datasets._make(res, full_info=True) for res in engine.execute(query).fetchall())
def get_year_range(engine: Engine, year_col: str, table_name: str): query = f'SELECT MIN(DISTINCT({year_col})) AS "min_year", MAX(DISTINCT({year_col})) AS "max_year" ' \ f'FROM {table_name};' result = engine.execute(query) years = { } for row in result: years['min'] = row[0] years['max'] = row[1] return years
def update_schema(engine: Engine) -> Set[PleaseRefresh]: """ Update the schema if needed. Returns what data should be resummarised. """ refresh = set() if not pg_column_exists(engine, f"{CUBEDASH_SCHEMA}.product", "fixed_metadata"): _LOG.info("schema.applying_update.add_fixed_metadata") engine.execute(f""" alter table {CUBEDASH_SCHEMA}.product add column fixed_metadata jsonb """) refresh.add(PleaseRefresh.DATASET_EXTENTS) return refresh
def _update_git_db_record(engine: Engine, git_table_schema: str, git_table: str, repository: str, branch: str, commit: str): """Update or create a Git version table.""" metadata = MetaData(engine) try: git_version_table = Table(git_table, metadata, autoload=True, schema=git_table_schema) except NoSuchTableError as error: logger.info( f'Table {git_table_schema + "." + git_table} not found. Creating the table.' ) git_version_table = _sqla_git_table(metadata, git_table_schema, git_table) try: metadata.create_all() except Exception as error: raise Exception( 'Git version table creation failed. See log for detailed error message.' ) from error git_version_table_columns = [col.name for col in git_version_table.c] if 'Commit_hash' in git_version_table_columns: logger.info(f'Re-creating table {git_table_schema + "." + git_table}.') try: metadata.drop_all() new_metadata = MetaData(engine) git_version_table = _sqla_git_table(new_metadata, git_table_schema, git_table) new_metadata.create_all() except Exception as error: raise Exception( 'Failed to re-create Git version table. See log for detailed error message.' ) from error logger.info(f"Repository: {repository}") logger.info(f"Branch: {branch}") logger.info(f"Version: {commit}") engine.execute(git_version_table.delete()) update_query = git_version_table.insert().values(Repository=repository, Branch=branch, Commit=commit) engine.execute(update_query)
async def test_list_tasks_handler(cli: TestClient, db: Engine, token: str) -> None: tasks = [ Task(title="Уборочка"), Task(title="Проездной"), Task(title="Dosug") ] schema: BaseSchema = TaskSchema() serialized_tasks, _ = schema.dump(tasks, many=True) db.execute(sa.insert(tables.task).values(serialized_tasks)) response: ClientResponse = await cli.get( f"/tasks?target_date={datetime.utcnow().date()}", headers={"Authorization": token}) response_json = await response.json() assert serialized_tasks == response_json
def drop_column(engine: Engine, table_name: str, column: sa.Column): """ calling the compiled SQL statement ALTER TABLE table_name drop COLUMN column :param engine: the bound sql database engine :param table_name: the name of the table with the column :param column: the instantiated column defintion :return: - nothing - """ c_table_name = _compile_name(table_name, dialect=engine.dialect) c_column_name = column.compile(dialect=engine.dialect) engine.execute('ALTER TABLE %s drop COLUMN %s ' % (c_table_name, c_column_name))
def _gen_access_events_from_history_query( self, query: str, engine: Engine) -> Iterable[RedshiftAccessEvent]: results: ResultProxy = engine.execute(query) for row in results: # type: RowProxy if not self._should_process_row(row): continue access_event = RedshiftAccessEvent(**dict(row.items())) # Replace database name with the alias name if one is provided in the config. if self.config.database_alias: access_event.database = self.config.database_alias yield access_event