def create_table( db_engine: sqlalchemy.engine, table_name: str, col_datatype_dct: dict, primary_key: str = None, index_lst: list = None, foreignkey_ref_dct: dict = None, ): # primary_key = "id INT AUTO_INCREMENT PRIMARY KEY" def_strings = [] col_definition_str = ", ".join( [f"{k} {v}" for k, v in col_datatype_dct.items()]) if primary_key: col_definition_str = primary_key + ", " + col_definition_str def_strings.append(col_definition_str) if foreignkey_ref_dct: foreign_key_strs = [ f"FOREIGN KEY ({k}) REFERENCES {v}" for k, v in foreignkey_ref_dct.items() ] foreign_str = ", ".join(foreign_key_strs) def_strings.append(foreign_str) if index_lst: index_str = ", ".join([f"INDEX ({index})" for index in index_lst]) def_strings.append(index_str) create_table_query = ( f"""CREATE TABLE IF NOT EXISTS {table_name} ({','.join(def_strings)});""" ) db_engine.execute(create_table_query)
def update_status(cls, db_engine: engine, email: str, status: str, updated_at: datetime): query = """ UPDATE {}.{} SET status = '{}', updated_at = '{}' WHERE email = '{}' """.format(PATT_SCHEMA_NAME, cls.__tablename__, status, updated_at, email) db_engine.execute(query)
def add_columns_to_table(db_engine: sqlalchemy.engine, table_name: str, col_datatype_dct: dict): add_col_definition_str = ", ".join( [f"ADD {k} {v}" for k, v in col_datatype_dct.items()]) sql_query = f""" ALTER TABLE {table_name} {add_col_definition_str} """ db_engine.execute(sql_query)
def df_to_sql(db_engine: sqlalchemy.engine, df: pd.DataFrame, table_name: str): def delete_quotation(str: str): return str.replace("'", "").replace('"', "") df = df.astype(str) df_values = [[delete_quotation(value) for value in values] for values in df.values] sql_query_start = f"INSERT INTO {table_name}" column_str = ",".join(list(df)) values_str = ",".join( [f"""('{"','".join(values)}')""" for values in df_values]) values_str = utils.multiple_replace({ "'nan'": "NULL", "'<NA>'": "NULL" }, values_str) sql_query = f"{sql_query_start} ({column_str}) VALUES {values_str}" db_engine.execute(sql_query)
def table_empty(db_engine: sqlalchemy.engine, table_name: str): empty_num = db_engine.execute(f""" SELECT EXISTS(SELECT 1 FROM {table_name}) """).scalar() if empty_num == 0: empty = False elif empty_num == 1: empty = True return empty
def create_predictions_job_log(connection: sqlalchemy.engine): query = ''' CREATE TABLE IF NOT EXISTS prediction_job_log ( id SERIAL PRIMARY KEY, date TIMESTAMPTZ, rows_predicted INT, model_version conversion_prediction_model_versions, created_at TIMESTAMPTZ, updated_at TIMESTAMPTZ, UNIQUE(date, model_version) ); ''' connection.execute(query) if not check_indices_existence('prediction_job_log', connection): for index_query in [ 'CREATE INDEX date ON prediction_job_log (date);', ]: connection.execute(index_query)
def upsert_test_results( cls, db_engine: engine, test_id: int, test_type: str, kpi_name: str, first_population_name: str, second_population_name: str, first_population_avg: float, second_population_avg: float, pvalue: float, updated_at: datetime, ): query = """ INSERT INTO {0}.{1} (test_id, test_type, kpi_name, first_population_name, second_population_name, first_population_avg, second_population_avg, pvalue, updated_at) VALUES ({2}, '{3}', '{4}', '{5}', '{6}', {7}, {8}, {9}, '{10}') ON CONFLICT (test_id, kpi_name, first_population_name, second_population_name) DO UPDATE SET test_id = excluded.test_id, test_type = excluded.test_type, kpi_name = excluded.kpi_name, first_population_name = excluded.first_population_name, second_population_name = excluded.second_population_name, first_population_avg = excluded.first_population_avg, second_population_avg = excluded.second_population_avg, pvalue = excluded.pvalue, updated_at = excluded.updated_at """.format( PATT_SCHEMA_NAME, cls.__tablename__, test_id, test_type, kpi_name, first_population_name, second_population_name, first_population_avg, second_population_avg, pvalue, updated_at, ) db_engine.execute(query)
def retrieve_data_for_query_key(query_string: str, query_arguments: dict, connection: sqlalchemy.engine) -> pd.DataFrame: query_string = sqlalchemy.sql.text(query_string) query = connection.execute(query_string, **query_arguments) data = pd.DataFrame(query.fetchall()) if data.empty: raise ValueError(f'No data available') data.columns = query.keys() return data
def table_exists(db_engine: sqlalchemy.engine, schema_name: str, table_name: str): exists_num = db_engine.execute(f""" SELECT EXISTS (SELECT * FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_SCHEMA = '{schema_name}' AND TABLE_NAME = '{table_name}') """).scalar() if exists_num == 0: exists = False elif exists_num == 1: exists = True return exists
def fetch_job_listings(db: engine) -> Optional[List[dict]]: """ Select rows from database and parse as list of dicts. :param db: Engine object representing a SQL database. :type db: engine :returns: Optional[List[dict]] """ results = db.execute("SELECT job_id, agency, business_title, \ salary_range_from, salary_range_to \ FROM nyc_jobs ORDER BY RAND();") rows = [dict(row) for row in results.fetchall()] LOGGER.info(f"Selected {results.rowcount} rows: \ {rows}") return rows
def update_job_listing(db: engine) -> Optional[List[dict]]: """ Update row in database with problematic characters escaped. :param db: Engine object representing a SQL database. :type db: engine :returns: Optional[List[dict]] """ result = db.execute( text("UPDATE nyc_jobs SET business_title = 'Senior QA Scapegoat 🏆', \ job_category = 'Information? <>!#%%Technology!%%#^&%* & Telecom' \ WHERE job_id = 229837;")) LOGGER.info(f"Selected {result.rowcount} row: \ {result}") return result.rowcount
def retrieve_user_defined_type_existence( connection: sqlalchemy.engine) -> List: query = sqlalchemy.sql.text(''' SELECT typname FROM pg_type WHERE typname IN :user_defined_types; ''') type_names = connection.execute( query, user_defined_types=tuple(CUSTOM_USER_DEFINED_TYPES)).fetchall() type_names = [type_name[0] for type_name in type_names] return type_names
def create_predictions_table(connection: sqlalchemy.engine): existing_user_defined_types = retrieve_user_defined_type_existence( connection) if 'conversion_prediction_outcomes' not in existing_user_defined_types: query = ''' CREATE TYPE conversion_prediction_outcomes AS ENUM ('conversion','no_conversion', 'shared_account_login'); ''' connection.execute(query) if 'conversion_prediction_model_versions' not in existing_user_defined_types: query = ''' CREATE TYPE conversion_prediction_model_versions AS ENUM ('1.0'); ''' connection.execute(query) query = ''' CREATE TABLE IF NOT EXISTS conversion_predictions_daily ( id SERIAL PRIMARY KEY, date TIMESTAMPTZ, browser_id TEXT, user_ids TEXT NULL, predicted_outcome conversion_prediction_outcomes, conversion_probability FLOAT, no_conversion_probability FLOAT, shared_account_login_probability FLOAT, model_version conversion_prediction_model_versions, created_at TIMESTAMPTZ, updated_at TIMESTAMPTZ, UNIQUE(date, browser_id) ); ''' connection.execute(query) if not check_indices_existence('conversion_predictions_daily', connection): for index_query in [ 'CREATE INDEX browser_id ON conversion_predictions_daily (browser_id);', 'CREATE INDEX user_id ON conversion_predictions_daily (user_id);', 'CREATE INDEX browser_date ON conversion_predictions_daily (date, browser_id);', 'CREATE INDEX predicted_outcome ON conversion_predictions_daily (predicted_outcome)' ]: connection.execute(index_query)
def table_index_exists(db_engine: sqlalchemy.engine, schema: str, table: str, index_name: str = None): sql_query = f""" SELECT COUNT(1) as IndexIsThere FROM INFORMATION_SCHEMA.STATISTICS WHERE table_schema='{schema}' AND table_name='{table}' """ if index_name: sql_query = sql_query + f" AND index_name='{index_name}'" index_exists_num = db_engine.execute(sql_query).scalar() if index_exists_num == 0: index_exists = False elif index_exists_num > 0: index_exists = True else: index_exists = False return index_exists
def _next_target(self, engine: sqlalchemy.engine) -> None: """ Определить следующее время удаления секрета, как """ sleep(0.01) query = sql.text( "SELECT MIN(DeleteDate) - CURRENT_TIMESTAMP FROM Secret.Storage") while True: wait_time = engine.execute(query).fetchone()[0] if wait_time is None: break wait_time = wait_time.microseconds / 100000 self.event.clear() self.event.wait(wait_time) self._delete(engine) self.is_empty = True self.event.clear() self.event.wait(10) self._next_target(engine)
def check_indices_existence(table_name: str, connection: sqlalchemy.engine) -> True: query = sqlalchemy.sql.text(''' SELECT t.relname AS table_name, i.relname AS index_name, a.attname AS column_name FROM pg_class t, pg_class i, pg_index ix, pg_attribute a WHERE t.oid = ix.indrelid AND i.oid = ix.indexrelid AND a.attrelid = t.oid AND a.attnum = ANY(ix.indkey) AND t.relkind = 'r' AND t.relname = :table_name ''') indices = connection.execute(query, table_name=table_name).fetchall() return len(indices) != 0
def get_latest_date_in_table(db_engine: sqlalchemy.engine, table_name: str): latest_date = db_engine.execute( f"SELECT MAX(date) FROM output.{table_name}").scalar() if not latest_date: raise IndexError("No data in variable 'date' in table") return latest_date
def delete_date_entries_in_table(db_engine: sqlalchemy.engine, min_date: str, table_name: str): db_engine.execute( f'DELETE FROM output.{table_name} WHERE date>="{min_date}";')
def delete_table(db_engine: sqlalchemy.engine, table: str): db_engine.execute(f"DROP TABLE {table}")
def truncate_table(db_engine: sqlalchemy.engine, table: str): db_engine.execute(f"TRUNCATE TABLE {table}")