def run(conn: connect): with conn: with conn.cursor() as curs: curs.execute("insert into fetched_records") with conn: with conn.cursor() as curs: a = curs.execute("select * from fetched_records") print(a)
def create_table_weeks(conn: psycopg2.connect, overwrite: bool=False) -> None: ''' Creates the columns and relationships of the WEEKS table ''' table_name = 'WEEKS' if overwrite == True: drop_table_statement = f'''DROP TABLE IF EXISTS {table_name};''' else: drop_table_statement = '' create_table_statment = f''' {drop_table_statement} CREATE TABLE {table_name} ( LEAGUE_ID BIGINT , SEASON_ID SMALLINT , WEEK_NUMBER SMALLINT , MATCHUP_PERIOD SMALLINT , REG_SEASON_FLAG SMALLINT , CONSTRAINT WEEKS_PKEY PRIMARY KEY(LEAGUE_ID, SEASON_ID, WEEK_NUMBER) ); ''' cursor = conn.cursor() cursor.execute(create_table_statment) conn.commit() cursor.close()
def create_table_teams(conn: psycopg2.connect, overwrite: bool=False) -> None: ''' Creates the columns and relationships of the TEAMS table ''' table_name = 'TEAMS' if overwrite == True: drop_table_statement = f'''DROP TABLE IF EXISTS {table_name};''' else: drop_table_statement = '' create_table_statment = f''' {drop_table_statement} CREATE TABLE {table_name} ( LEAGUE_ID BIGINT , SEASON_ID SMALLINT , TEAM_ID SMALLINT , MANAGER_ID VARCHAR(50) , TEAM_NAME VARCHAR(50) , MANAGER_NAME VARCHAR(50) , ESPN_NAME VARCHAR(50) , CONSTRAINT TEAMS_PKEY PRIMARY KEY(LEAGUE_ID, SEASON_ID, TEAM_ID) ); ''' cursor = conn.cursor() cursor.execute(create_table_statment) conn.commit() cursor.close()
def insert_data(conn: psycopg2.connect, df: pd.DataFrame) -> None: ''' Bulk insert dataframe into advertisementdata table. This function was inspired by Naysan Saran's article "Pandas to PostgreSQL using Psycopg2: Bulk Insert Performance Benchmark", in which the author chose a variety of bulk insert methods and compared their execution time. Saving the dataframe to a StringIO object and then copying this to the database proved to be the most efficient when dealing with millions of records. Source: https://naysan.ca/2020/05/09/pandas-to-postgresql-using-psycopg2-bulk-insert-performance-benchmark/ ''' set_index(conn, df) buffer = StringIO() df.to_csv(buffer, index_label='id', header=False) buffer.seek(0) cursor = conn.cursor() try: cursor.copy_from(buffer, 'advertisementdata', sep=",") conn.commit() except (Exception, psycopg2.DatabaseError) as error: logging.error(f"Error inserting data: {error}") conn.rollback() cursor.close() cursor.close()
def upsert_rows(conn: psycopg2.connect, df: pd.DataFrame, table: str, pkeys: list) -> None: """ Using cursor.mogrify() to build the bulk insert query then cursor.execute() to execute the query """ # Create a list of tupples from the dataframe values tuples = [tuple(x) for x in df.to_numpy()] tuples_str = ', '.join(map(str, tuples)) # Comma-separated dataframe columns cols = ','.join(list(df.columns)) insert_statement = "INSERT INTO %s(%s) VALUES %s" % (table, cols, tuples_str) on_conflict_statement = 'ON CONFLICT (' + ', '.join(map(str, pkeys)) + ')' do_update_statement = _create_update_set_statement(list(df.columns)) # SQL quert to execute query = insert_statement + ' ' + on_conflict_statement + ' ' + do_update_statement cursor = conn.cursor() try: cursor.execute(query) conn.commit() except (Exception, psycopg2.DatabaseError) as error: print("Error: %s" % error) conn.rollback() cursor.close() return 1 cursor.close()
def bootstrap( self ): """ Initialize our database connection after our fork() has occured, this is due to the nature of the psycopg2 library when used with psycopg2.connect( async = True ) http://initd.org/psycopg/docs/usage.html#thread-and-process-safety @params None @return None """ # Initialize database connection sqlCon = Database( database = "SpfAudit", async = True ) # Obtain our database descriptor once were ready to process self.block( sqlCon ) sql = sqlCon.cursor() # Ensure schema intact before processing sql.execute( self.query[ 'ctable' ] ) self.block( sql.connection ) # Propagate our master connection and cursor objects into # state structure. self.state.update( { 'sqlCon' : sqlCon, 'sql' : sql, } )
def get_score(conn: psycopg2.connect) -> float: sql = """ SELECT AVG(value) FROM ( SELECT DISTINCT message_id, mood, date, CASE WHEN mood = 'Awesome' THEN 5 WHEN mood = 'Good' THEN 4 WHEN mood = 'Okay' THEN 3 WHEN mood = 'Bad' THEN 2 WHEN mood = 'Terrible' THEN 1 END AS value FROM moodtracker ORDER BY date ASC ) AS mean """ cur = conn.cursor() cur.execute(sql) data = cur.fetchall() cur.close() return round(data[0][0], 1)
def table_columns(conn: psycopg2.connect, table: str) -> tuple: """ Pulls all columns in a table """ table = table.lower() query = f''' SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA = 'public' AND TABLE_NAME = '{table}' ''' cursor = conn.cursor() try: cursor.execute(query) cols = cursor.fetchall() cols = [col[0] for col in cols] cursor.close() return cols except (Exception, psycopg2.DatabaseError) as error: print("Error: %s" % error) conn.rollback() cursor.close() return 1
def insert_mood(conn: psycopg2.connect, update: Update, user_data: Dict[str, Union[str, List[str]]]) -> None: sql = """ INSERT INTO moodtracker (message_id, chat_id, mood, reason, note, date) VALUES(%s, %s, %s, %s, %s, %s) """ cur = conn.cursor() chat_id = update.message.chat_id mood = user_data['mood'] note = user_data['note'] reasons = user_data['reasons'] date = datetime.now() message_id = str(chat_id) + str(date.strftime("%Y%m%d%H%M%S%f")) if len(reasons) == 0: reasons = None cur.execute(sql, (message_id, chat_id, mood, reasons, note, date)) else: for reason in user_data['reasons']: cur.execute(sql, (message_id, chat_id, mood, reason, note, date)) conn.commit() cur.close()
def record_exists(record: Record, conn: connect): with conn: with conn.cursor() as curs: curs.execute(( "SELECT record_id_updated_at FROM fetched_records WHERE record_id = %(record_id)s" ), {'record_id': record.meta.record_id}) response = curs.fetchone() return not (response is None or len(response) == 0)
def create_table_scores(conn: psycopg2.connect, overwrite: bool=False) -> None: ''' Creates the columns and relationships of the WEEKLY_SCORES table ''' table_name = 'SCORES' if overwrite == True: drop_table_statement = f'''DROP TABLE IF EXISTS {table_name};''' else: drop_table_statement = '' create_table_statment = f''' {drop_table_statement} CREATE TABLE {table_name} ( LEAGUE_ID BIGINT , SEASON_ID SMALLINT , WEEK_NUMBER BIGINT , TEAM_ID SMALLINT , TEAM_ID_OPP SMALLINT , SCORE NUMERIC(5, 2) , SCORE_OPP NUMERIC(5, 2) , WLT_POINTS NUMERIC(2, 1) , WIN_IND SMALLINT , LOSS_IND SMALLINT , TIE_IND SMALLINT , ALL_PLAY_WLT_POINTS NUMERIC(3, 1) , ALL_PLAY_WINS SMALLINT , ALL_PLAY_LOSSES SMALLINT , ALL_PLAY_TIES SMALLINT , CUM_SCORE NUMERIC(6, 2) , CUM_SCORE_OPP NUMERIC(6, 2) , CUM_WLT_POINTS NUMERIC(3, 1) , CUM_WINS SMALLINT , CUM_LOSSES SMALLINT , CUM_TIES SMALLINT , CUM_ALL_PLAY_WLT_POINTS NUMERIC(4, 1) , CUM_ALL_PLAY_WINS SMALLINT , CUM_ALL_PLAY_LOSSES SMALLINT , CUM_ALL_PLAY_TIES SMALLINT , CUM_SCORE_PER_WEEK NUMERIC(5, 2) , CUM_SCORE_OPP_PER_WEEK NUMERIC(5, 2) , CUM_ALL_PLAY_WLT_POINTS_PER_WEEK NUMERIC(3, 1) , RECORD VARCHAR(10) , ALL_PLAY_RECORD VARCHAR(10) , STANDINGS SMALLINT , HOME_OR_AWAY VARCHAR(10) , CONSTRAINT WEEKLY_SCORES_PKEY PRIMARY KEY(LEAGUE_ID, SEASON_ID, WEEK_NUMBER, TEAM_ID) ); ''' cursor = conn.cursor() cursor.execute(create_table_statment) conn.commit() cursor.close()
def execute_sql(cnx: connect, sql: str, values: tuple) -> list: """Pass sql query""" start = time.time() cursor = cnx.cursor() if '.sql' in sql: file = open(sql, 'r') sql = file.read() file.close() result = [] sql_command = sql.split(';') try: for sql in sql_command: if sql == '' or '--' in sql: continue try: cursor.execute(sql, values) if 'SELECT' in sql: column_names = tuple(desc[0] for desc in cursor.description) result.append({ "query": sql, 'column_names': column_names, "data": cursor.fetchall() }) elif 'DELETE' in sql: stop = time.time() - start result.append({ "query": sql, "data": 'query executed in {:06.3f}s. {} rows affected'.format( stop, cursor.rowcount) }) else: stop = time.time() - start result.append({ "query": sql, "data": 'query executed in {:06.3f}s'.format(stop) }) except ProgrammingError as error: print(error) print("query unsuccessful: {}".format(sql)) result.append({ "query": sql, "data": 'query unsuccessful: {}'.format(error) }) finally: cursor.close() cnx.close() return result
def upload_comment(self, comment_data: Comment, conn: psycopg2.connect): cur = conn.cursor() datas = (comment_data.message, comment_data.author_name, comment_data.thumbnails, comment_data.timestamp_msec, comment_data.timestamp_text, comment_data.purchase_amount, comment_data.movie_id) cur.execute( "INSERT INTO comment(message,author_name,thumbnails,timestamp_msec,timestamp_text,purchase_amount,movie_id) VALUES (%s,%s,%s,%s,%s,%s,%s);", datas) conn.commit() cur.close()
def fetch_local_s3_url(record: Record, query: Search, conn: connect): with conn: with conn.cursor() as curs: curs.execute( "SELECT s3_bucket, s3_location FROM fetched_records where record_id = %(record_id)s limit 1", {'record_id': record.meta.record_id}) response = curs.fetchone() if response is not None: return True, response else: return False,
def execute_query(conn: psycopg2.connect, query: str) -> None: cursor = conn.cursor() try: cursor.execute(query) conn.commit() except (Exception, psycopg2.DatabaseError) as error: logging.error(f"Unable to execute query. Error: {error}") conn.rollback() cursor.close() cursor.close()
def upload_comments(self, comment_datas: List[Comment], conn: psycopg2.connect): cur = conn.cursor() list_data = [(comment_data.message, comment_data.author_name, comment_data.thumbnails, comment_data.timestamp_msec, comment_data.timestamp_text, comment_data.purchase_amount, comment_data.movie_id) for comment_data in comment_datas] cur.executemany( "INSERT INTO comment(message,author_name,thumbnails,timestamp_msec,timestamp_text,purchase_amount,movie_id) VALUES (%s,%s,%s,%s,%s,%s,%s);", list_data) conn.commit() cur.close()
def access_control_db(con: psycopg2.connect) -> psycopg2.connect: #this function will create two users; an admin for us to populate the db, and another #user which can only query stuff (this will be what the front end users authenticate as) con.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT) cur = con.cursor() try: query = sql.SQL( "select COUNT(*) from {table} where {pkey} = %s").format( table=sql.Identifier('pg_roles'), pkey=sql.Identifier('rolname')) cur.execute(query, ('music_man', )) print(cur.fetchone()[0]) except psycopg2.DatabaseError as e: print('Error in DB access control: {}'.format(e))
def create_table(conn: psycopg2.connect) -> None: TABLE_CREATION = """ CREATE TABLE IF NOT EXISTS moodtracker ( message_id varchar(40) NOT NULL, chat_id varchar(10) NOT NULL, mood varchar(10), reason varchar(25), note varchar(255), date timestamp ) """ cur = conn.cursor() cur.execute(TABLE_CREATION) cur.close() conn.commit()
def get_most_recent_note(conn: psycopg2.connect) -> str: sql = """ SELECT note FROM moodtracker ORDER BY date DESC LIMIT 1 """ cur = conn.cursor() cur.execute(sql) data = cur.fetchall() cur.close() return data[0][0]
def get_users_of_id_vk( connection: pg.connect, table_name: str, ids_vk: (list, tuple, set), bd: bool = False, ) -> list: id_str = "" if bd else "_vk" ids_vk_string = ", ".join(map(str, ids_vk)) with connection: with connection.cursor() as cursor: cursor.execute( f"select * from {table_name} where id{id_str} in ({ids_vk_string})" ) result_user_vk = cursor.fetchall() if result_user_vk: return result_user_vk
class Database(object): def __init__(self): self.conn = PGConnect( user=config.DB_USER, password=config.DB_PASS, host=config.DB_HOST, port=config.DB_PORT, database=config.DB_NAME ) cursor = self.conn.cursor() cursor.execute("SELECT VERSION()") results = cursor.fetchone() if results: self.connected = True else: self.connected = False
def get_top_negative_reasons(conn: psycopg2.connect) -> List[List[str]]: sql = """ SELECT reason, COUNT(reason) FROM moodtracker WHERE mood IN ('Bad', 'Terrible') GROUP BY reason ORDER BY COUNT(reason) DESC LIMIT 5 """ cur = conn.cursor() cur.execute(sql) data = cur.fetchall() cur.close() return data
def drop_rows(conn: psycopg2.connect, table: str, where_condition: str) -> None: ''' Drops rows from a table based on a set of conditions ''' query = f''' DELETE FROM {table} WHERE {where_condition} ''' cursor = conn.cursor() try: cursor.execute(query) conn.commit() except (Exception, psycopg2.DatabaseError) as error: print("Error: %s" % error) conn.rollback() cursor.close() return 1 cursor.close()
def run_sql_etl(sql: str, conn: pg.connect, table_name: str): """Runs given SQL query on the provided PostgreSQL connection obj. Args: sql (str): SQL script to run conn (psycopg2.connect): PostgreSQL database connection table_name (str): Table name for logging purposes """ func_start_time = time() log.debug('Running SQL ETL for "%s" table', table_name) with conn.cursor() as cur: cur.execute(sql) conn.commit() log.info( 'SQL ETL for table "%s" completed in: %s seconds', table_name, round(time() - func_start_time, 3), )
def create_table(connection: pg.connect, table_name, *columns): # создает таблицы if isinstance(table_name, str): string_columns = "" for column in columns: if isinstance(column, str): string_columns += column + ", " string_columns = string_columns.strip(", ") with connection.cursor() as cursor: if string_columns > "": try: cursor.execute( f"CREATE TABLE IF NOT EXISTS {table_name} ({string_columns})" ) return True except pg.DatabaseError as e: print(f"Не удалось создать таблицу по причине: {e}") return False
def dq_check_fact_table_row_count( stage_table: str, fact_table: str, conn: pg.connect, ) -> bool: """Compares row count between the stage and fact tables Args: stage_table (str): Target stage table name fact_table (str): Target fact table name conn (pg.connect): Psycopg2 connect obj Returns: bool: True, if checks have passed. False, if failed. """ func_start_time = time() log.debug( 'Running row consistency checks on "%s" and "%s" tables', stage_table, fact_table, ) with conn.cursor() as cur: cur.execute(TABLE_ROW_COUNT, {'table': AsIs(stage_table)}) stage_row_count = cur.fetchone()[0] cur.execute(TABLE_ROW_COUNT, {'table': AsIs(fact_table)}) fact_row_count = cur.fetchone()[0] if not stage_row_count == fact_row_count: log.warning('Table consistency checks have failed') log.warning('Table %s row count is %s', stage_table, stage_row_count) log.warning('Table %s row count is %s', fact_table, fact_row_count) else: log.debug('Table %s row count is %s', stage_table, stage_row_count) log.debug('Table %s row count is %s', fact_table, fact_row_count) log.info( 'Consistency checks on "%s" and "%s" tables finished in %s seconds', stage_table, fact_table, round(time() - func_start_time, 3), )
def bulk_load_df(data: pd.DataFrame, table_name: str, conn: pg.connect): """Bulk inserts a pandas dataframe into PostgreSQL table Args: data (pandas.Dataframe): Data for insertion table_name (str): Table name for logging purposes conn (psycopg2.connect): PostgreSQL database connection """ buffer = StringIO() buffer.write(data.to_csv(index=None, header=None, na_rep='')) buffer.seek(0) with conn.cursor() as cur: cur.copy_from( buffer, table_name, columns=data.columns, sep=',', null='', ) conn.commit()
def insert_record(record: Record, query: Search, conn: connect, s3_bucket: str, s3_location: str): # this needs to use e3db fetched records with conn: with conn.cursor() as curs: curs.execute( ("INSERT INTO fetched_records" " (query_string, record_meta, file_data, record_id, record_id_updated_at, file_name," " s3_bucket, s3_location)" "VALUES " " (%(query_string)s, %(record_meta)s, %(file_data)s, %(record_id)s," " %(record_id_updated_at)s, %(file_name)s, %(s3_bucket)s, %(s3_location)s)" ), { 'query_string': dumps(query.to_json()), 'record_meta': dumps(record.meta.plain), 'file_data': dumps(record.meta.file_meta.to_json()), 'record_id': record.meta.record_id, 'record_id_updated_at': record.meta.last_modified, 'file_name': record.meta.file_meta._file_name, 's3_bucket': s3_bucket, 's3_location': s3_location })
def get_moods(conn: psycopg2.connect) -> List[List[str]]: sql = """ SELECT DISTINCT message_id, mood, date, CASE WHEN mood = 'Awesome' THEN 5 WHEN mood = 'Good' THEN 4 WHEN mood = 'Okay' THEN 3 WHEN mood = 'Bad' THEN 2 WHEN mood = 'Terrible' THEN 1 END AS value FROM moodtracker ORDER BY date ASC """ cur = conn.cursor() cur.execute(sql) data = cur.fetchall() cur.close() return data
def insert_rows(conn: psycopg2.connect, df: pd.DataFrame, table: str) -> None: ''' Inserts the df values into the DB table ''' # Create a list of tupples from the dataframe values tuples = [tuple(x) for x in df.to_numpy()] tuples_str = ', '.join(map(str, tuples)) # Comma-separated dataframe columns cols = ','.join(list(df.columns)) # SQL quert to execute query = "INSERT INTO %s(%s) VALUES %s" % (table, cols, tuples_str) cursor = conn.cursor() try: cursor.execute(query) conn.commit() except (Exception, psycopg2.DatabaseError) as error: print("Error: %s" % error) conn.rollback() cursor.close() return 1 cursor.close()
def insert_frags_to_postgresql(connection: pg_connect, match_id: str, frags: List[Tuple[datetime, Any]]) -> None: """Insert Match Frags into SQLite. This function inserts new records into the table match_frag. Args: connection: A sqlite3 Connection object. match_id: The identifier of a match. frags: A list of frags. """ curs = connection.cursor() for frag in frags: if len(frag) == 2: curs.execute( """INSERT INTO match_frag (match_id, frag_time, killer_name) VALUES (%s, %s, %s)""", (match_id, *frag)) if len(frag) == 4: curs.execute( """INSERT INTO match_frag (match_id, frag_time, killer_name, victim_name, weapon_code) VALUES (%s, %s, %s, %s, %s)""", (match_id, *frag))