def create_table( db_engine: sqlalchemy.engine, table_name: str, col_datatype_dct: dict, primary_key: str = None, index_lst: list = None, foreignkey_ref_dct: dict = None, ): # primary_key = "id INT AUTO_INCREMENT PRIMARY KEY" def_strings = [] col_definition_str = ", ".join( [f"{k} {v}" for k, v in col_datatype_dct.items()]) if primary_key: col_definition_str = primary_key + ", " + col_definition_str def_strings.append(col_definition_str) if foreignkey_ref_dct: foreign_key_strs = [ f"FOREIGN KEY ({k}) REFERENCES {v}" for k, v in foreignkey_ref_dct.items() ] foreign_str = ", ".join(foreign_key_strs) def_strings.append(foreign_str) if index_lst: index_str = ", ".join([f"INDEX ({index})" for index in index_lst]) def_strings.append(index_str) create_table_query = ( f"""CREATE TABLE IF NOT EXISTS {table_name} ({','.join(def_strings)});""" ) db_engine.execute(create_table_query)
def update_status(cls, db_engine: engine, email: str, status: str, updated_at: datetime): query = """ UPDATE {}.{} SET status = '{}', updated_at = '{}' WHERE email = '{}' """.format(PATT_SCHEMA_NAME, cls.__tablename__, status, updated_at, email) db_engine.execute(query)
def add_columns_to_table(db_engine: sqlalchemy.engine, table_name: str, col_datatype_dct: dict): add_col_definition_str = ", ".join( [f"ADD {k} {v}" for k, v in col_datatype_dct.items()]) sql_query = f""" ALTER TABLE {table_name} {add_col_definition_str} """ db_engine.execute(sql_query)
def add(self, secret: str, phrase: str, cipher: Fernet, engine: sqlalchemy.engine, delete_date: None) -> str: """ Adding new secret to database :param secret: secret's text :param phrase: code phrase to unlock a secret :param cipher: cryptography.fernet.Fernet :param engine: sqlalchemy.Engine Engine to use for connecting to database objects :param delete_date: interval after which unlocked secret will be delete may be None (secret will store unlimited time) or between 1 second ana 6 days 23 hours 59 minutes 59 seconds :return: unique secret key of stored message """ if self.is_empty: self.event.set() self.is_empty = False with engine.begin() as conn: query = sql.text( "SELECT Secret.generate_secret(:phrase, :secret, :delete_date)" ) result = conn \ .execute(query, phrase=generate_password_hash(phrase), secret=cipher.encrypt(str.encode(secret)), delete_date=delete_date) \ .fetchone()[0] secret_key = hashlib.sha256(str.encode(str(result))).hexdigest() upd = sql.text( "UPDATE Secret.Storage SET SecretKey = :skey WHERE StorageId = :id" ) conn.execute(upd, skey=secret_key, id=result) return secret_key
def _upsert(table: sqa.Table, engine: sqa.engine, clean_data: pd.DataFrame): """ insert data into a table, replacing any duplicate indices postgres - see: https://docs.sqlalchemy.org/en/13/dialects/postgresql.html#insert-on-conflict-upsert """ # print(engine.dialect.dbapi.__name__) with engine.begin() as con: for index, row in clean_data.iterrows(): # check index uniqueness by attempting insert; if it fails, update row = {**row.dropna().to_dict(), clean_data.index.name: index} try: if engine.dialect.dbapi.__name__ == 'psycopg2': insert = pg_insert(table).values( row).on_conflict_do_update( index_elements=[clean_data.index.name], set_=row) else: insert = table.insert().values(row) con.execute(insert) except sqa.exc.IntegrityError: upsert = table.update() \ .where(table.c[clean_data.index.name] == index) \ .values(row) con.execute(upsert)
def insert_death( death: list, table: sqlalchemy.Table, engine: sqlalchemy.engine, ) -> None: try: backpack_bool = True if death[4][4] else False except IndexError: backpack_bool = False insertion = table.insert().values( user = death[0], datetime = death[1], base_fame = death[2], total_fame = death[3], weapon = death[4][0], secondary = death[4][1], armour = death[4][2], ring = death[4][3], backpack = backpack_bool, maxed_stats = int(death[5][0]), killed_by = death[6], ) with engine.connect() as connection: result = connection.execute(insertion) return None
def table_empty(db_engine: sqlalchemy.engine, table_name: str): empty_num = db_engine.execute(f""" SELECT EXISTS(SELECT 1 FROM {table_name}) """).scalar() if empty_num == 0: empty = False elif empty_num == 1: empty = True return empty
def df_to_sql(db_engine: sqlalchemy.engine, df: pd.DataFrame, table_name: str): def delete_quotation(str: str): return str.replace("'", "").replace('"', "") df = df.astype(str) df_values = [[delete_quotation(value) for value in values] for values in df.values] sql_query_start = f"INSERT INTO {table_name}" column_str = ",".join(list(df)) values_str = ",".join( [f"""('{"','".join(values)}')""" for values in df_values]) values_str = utils.multiple_replace({ "'nan'": "NULL", "'<NA>'": "NULL" }, values_str) sql_query = f"{sql_query_start} ({column_str}) VALUES {values_str}" db_engine.execute(sql_query)
def table_col_names(db_engine: sqlalchemy.engine, schema_name: str, table_name: str): column_names = [] with db_engine.connect() as con: rows = con.execute( f"select column_name from information_schema.columns where table_schema = '{schema_name}' and table_name='{table_name}'" ) column_names = [row[0] for row in rows] return column_names
def create_predictions_job_log(connection: sqlalchemy.engine): query = ''' CREATE TABLE IF NOT EXISTS prediction_job_log ( id SERIAL PRIMARY KEY, date TIMESTAMPTZ, rows_predicted INT, model_version conversion_prediction_model_versions, created_at TIMESTAMPTZ, updated_at TIMESTAMPTZ, UNIQUE(date, model_version) ); ''' connection.execute(query) if not check_indices_existence('prediction_job_log', connection): for index_query in [ 'CREATE INDEX date ON prediction_job_log (date);', ]: connection.execute(index_query)
def upsert_test_results( cls, db_engine: engine, test_id: int, test_type: str, kpi_name: str, first_population_name: str, second_population_name: str, first_population_avg: float, second_population_avg: float, pvalue: float, updated_at: datetime, ): query = """ INSERT INTO {0}.{1} (test_id, test_type, kpi_name, first_population_name, second_population_name, first_population_avg, second_population_avg, pvalue, updated_at) VALUES ({2}, '{3}', '{4}', '{5}', '{6}', {7}, {8}, {9}, '{10}') ON CONFLICT (test_id, kpi_name, first_population_name, second_population_name) DO UPDATE SET test_id = excluded.test_id, test_type = excluded.test_type, kpi_name = excluded.kpi_name, first_population_name = excluded.first_population_name, second_population_name = excluded.second_population_name, first_population_avg = excluded.first_population_avg, second_population_avg = excluded.second_population_avg, pvalue = excluded.pvalue, updated_at = excluded.updated_at """.format( PATT_SCHEMA_NAME, cls.__tablename__, test_id, test_type, kpi_name, first_population_name, second_population_name, first_population_avg, second_population_avg, pvalue, updated_at, ) db_engine.execute(query)
def retrieve_data_for_query_key(query_string: str, query_arguments: dict, connection: sqlalchemy.engine) -> pd.DataFrame: query_string = sqlalchemy.sql.text(query_string) query = connection.execute(query_string, **query_arguments) data = pd.DataFrame(query.fetchall()) if data.empty: raise ValueError(f'No data available') data.columns = query.keys() return data
def _upsert(table: sqa.Table, engine: sqa.engine, cleaned_data: pd.DataFrame): """ insert data into a table, replacing any rows with duplicate indices When upsert finds duplicate records, it overwrites ALL VALUES that are present in source DataFrame, including NaN. postgres - see: https://docs.sqlalchemy.org/en/13/dialects/postgresql.html#insert-on-conflict-upsert """ if isinstance(cleaned_data.index, pd.MultiIndex): multi = True names = cleaned_data.index.names index_elements = names else: multi = False names = cleaned_data.index.name index_elements = [names] cleaned_data = cleaned_data.astype('object') def map2none(val): """sqlalchemy freaks out about NaNs, so replace them with None""" if pd.notna(val): return val with engine.begin() as con: for row in cleaned_data.reset_index(drop=False).itertuples(index=False): # check index uniqueness by attempting insert; if it fails, update row = {k: map2none(v) for k, v in row._asdict().items()} try: if engine.dialect.dbapi.__name__ == 'psycopg2': insert = pg_insert(table).values(row).on_conflict_do_update( index_elements=index_elements, set_=row ) else: insert = table.insert().values(row) con.execute(insert) except sqa.exc.IntegrityError: if multi: upsert = table.update() for n in names: upsert = upsert.where(table.c[n] == row[n]) con.execute(upsert.values(row)) else: upsert = table.update() \ .where(table.c[names] == row[names]) \ .values(row) con.execute(upsert)
def table_exists(db_engine: sqlalchemy.engine, schema_name: str, table_name: str): exists_num = db_engine.execute(f""" SELECT EXISTS (SELECT * FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_SCHEMA = '{schema_name}' AND TABLE_NAME = '{table_name}') """).scalar() if exists_num == 0: exists = False elif exists_num == 1: exists = True return exists
def _insert(table: sqa.Table, engine: sqa.engine, clean_data: pd.DataFrame, auto_index: bool): with engine.begin() as con: rows = [] df = clean_data.dropna(axis=1, how='all') if not auto_index: for index, row in df.iterrows(): rows.append({**row.to_dict(), df.index.name: index}) con.execute(table.insert(), rows) else: for index, row in df.iterrows(): rows.append({**row.to_dict()}) con.execute(table.insert(), rows)
def update_job_listing(db: engine) -> Optional[List[dict]]: """ Update row in database with problematic characters escaped. :param db: Engine object representing a SQL database. :type db: engine :returns: Optional[List[dict]] """ result = db.execute( text("UPDATE nyc_jobs SET business_title = 'Senior QA Scapegoat 🏆', \ job_category = 'Information? <>!#%%Technology!%%#^&%* & Telecom' \ WHERE job_id = 229837;")) LOGGER.info(f"Selected {result.rowcount} row: \ {result}") return result.rowcount
def _insert(table: sqa.Table, engine: sqa.engine, cleaned_data: pd.DataFrame, auto_index: bool): with engine.begin() as con: rows = [] # remove completely null columns; convert to object due to bug inserting Int64 df = cleaned_data.dropna(axis=1, how='all').astype('object') if not auto_index: for row in df.reset_index(drop=False).itertuples(index=False): rows.append(row._asdict()) else: for row in df.reset_index(drop=True).itertuples(index=False): rows.append(row._asdict()) con.execute(table.insert(), rows)
def fetch_job_listings(db: engine) -> Optional[List[dict]]: """ Select rows from database and parse as list of dicts. :param db: Engine object representing a SQL database. :type db: engine :returns: Optional[List[dict]] """ results = db.execute("SELECT job_id, agency, business_title, \ salary_range_from, salary_range_to \ FROM nyc_jobs ORDER BY RAND();") rows = [dict(row) for row in results.fetchall()] LOGGER.info(f"Selected {results.rowcount} rows: \ {rows}") return rows
def retrieve_user_defined_type_existence( connection: sqlalchemy.engine) -> List: query = sqlalchemy.sql.text(''' SELECT typname FROM pg_type WHERE typname IN :user_defined_types; ''') type_names = connection.execute( query, user_defined_types=tuple(CUSTOM_USER_DEFINED_TYPES)).fetchall() type_names = [type_name[0] for type_name in type_names] return type_names
def db_to_csv(self, sql_path: str, engine: sqlalchemy.engine, path: str) -> None: """Reads in a sql file and writes out a file to a given location.""" file = open(SCRIPT_DIR+'/..'+'/sql/'+sql_path) with engine.connect() as conn: query = sqlalchemy.text(file.read()) resultproxy = conn.execute(query) df_dict = [ {column: value for column, value in rowproxy.items()} for rowproxy in resultproxy ] df = pd.DataFrame(df_dict) out_path = self.base + path + self.date + '/' + str(sql_path.split('.')[0]) + '.csv' df.to_csv(out_path, index=False)
def insert_offer( offer: list, table: sqlalchemy.Table, engine: sqlalchemy.engine, ) -> None: insertion = table.insert().values( user = offer[-1], selling = ','.join(offer[0]), buying = ','.join(offer[1]), quantity = offer[2], datetime = offer[3], ) with engine.connect() as connection: result = connection.execute(insertion) return None
def create_predictions_table(connection: sqlalchemy.engine): existing_user_defined_types = retrieve_user_defined_type_existence( connection) if 'conversion_prediction_outcomes' not in existing_user_defined_types: query = ''' CREATE TYPE conversion_prediction_outcomes AS ENUM ('conversion','no_conversion', 'shared_account_login'); ''' connection.execute(query) if 'conversion_prediction_model_versions' not in existing_user_defined_types: query = ''' CREATE TYPE conversion_prediction_model_versions AS ENUM ('1.0'); ''' connection.execute(query) query = ''' CREATE TABLE IF NOT EXISTS conversion_predictions_daily ( id SERIAL PRIMARY KEY, date TIMESTAMPTZ, browser_id TEXT, user_ids TEXT NULL, predicted_outcome conversion_prediction_outcomes, conversion_probability FLOAT, no_conversion_probability FLOAT, shared_account_login_probability FLOAT, model_version conversion_prediction_model_versions, created_at TIMESTAMPTZ, updated_at TIMESTAMPTZ, UNIQUE(date, browser_id) ); ''' connection.execute(query) if not check_indices_existence('conversion_predictions_daily', connection): for index_query in [ 'CREATE INDEX browser_id ON conversion_predictions_daily (browser_id);', 'CREATE INDEX user_id ON conversion_predictions_daily (user_id);', 'CREATE INDEX browser_date ON conversion_predictions_daily (date, browser_id);', 'CREATE INDEX predicted_outcome ON conversion_predictions_daily (predicted_outcome)' ]: connection.execute(index_query)
def table_index_exists(db_engine: sqlalchemy.engine, schema: str, table: str, index_name: str = None): sql_query = f""" SELECT COUNT(1) as IndexIsThere FROM INFORMATION_SCHEMA.STATISTICS WHERE table_schema='{schema}' AND table_name='{table}' """ if index_name: sql_query = sql_query + f" AND index_name='{index_name}'" index_exists_num = db_engine.execute(sql_query).scalar() if index_exists_num == 0: index_exists = False elif index_exists_num > 0: index_exists = True else: index_exists = False return index_exists
def _next_target(self, engine: sqlalchemy.engine) -> None: """ Определить следующее время удаления секрета, как """ sleep(0.01) query = sql.text( "SELECT MIN(DeleteDate) - CURRENT_TIMESTAMP FROM Secret.Storage") while True: wait_time = engine.execute(query).fetchone()[0] if wait_time is None: break wait_time = wait_time.microseconds / 100000 self.event.clear() self.event.wait(wait_time) self._delete(engine) self.is_empty = True self.event.clear() self.event.wait(10) self._next_target(engine)
def to_sql_ignore(df: pandas.DataFrame, engine: sqlalchemy.engine, tablename: str): temp_table = tablename + '_temp' df.to_sql(con=engine, name=temp_table, index=False, if_exists='replace', dtype={ 'game_date': sqlalchemy.Date, 'pitchid': sqlalchemy.types.LargeBinary }) """ connection = engine.connect() result = connection.execute(f'INSERT INTO {tablename} (SELECT * FROM {temp_table}) ON CONFLICT DO NOTHING') connection.close() """ with engine.begin() as cn: insert_sql = f'INSERT INTO {tablename} (SELECT * FROM {temp_table}) ON CONFLICT DO NOTHING' cn.execute(insert_sql) drop_sql = f'DROP TABLE {temp_table}' cn.execute(drop_sql)
def check_indices_existence(table_name: str, connection: sqlalchemy.engine) -> True: query = sqlalchemy.sql.text(''' SELECT t.relname AS table_name, i.relname AS index_name, a.attname AS column_name FROM pg_class t, pg_class i, pg_index ix, pg_attribute a WHERE t.oid = ix.indrelid AND i.oid = ix.indexrelid AND a.attrelid = t.oid AND a.attnum = ANY(ix.indkey) AND t.relkind = 'r' AND t.relname = :table_name ''') indices = connection.execute(query, table_name=table_name).fetchall() return len(indices) != 0
def truncate_table(db_engine: sqlalchemy.engine, table: str): db_engine.execute(f"TRUNCATE TABLE {table}")
def get_latest_date_in_table(db_engine: sqlalchemy.engine, table_name: str): latest_date = db_engine.execute( f"SELECT MAX(date) FROM output.{table_name}").scalar() if not latest_date: raise IndexError("No data in variable 'date' in table") return latest_date
def delete_date_entries_in_table(db_engine: sqlalchemy.engine, min_date: str, table_name: str): db_engine.execute( f'DELETE FROM output.{table_name} WHERE date>="{min_date}";')
def delete_table(db_engine: sqlalchemy.engine, table: str): db_engine.execute(f"DROP TABLE {table}")