def upsert_rows(conn: psycopg2.connect, df: pd.DataFrame, table: str, pkeys: list) -> None: """ Using cursor.mogrify() to build the bulk insert query then cursor.execute() to execute the query """ # Create a list of tupples from the dataframe values tuples = [tuple(x) for x in df.to_numpy()] tuples_str = ', '.join(map(str, tuples)) # Comma-separated dataframe columns cols = ','.join(list(df.columns)) insert_statement = "INSERT INTO %s(%s) VALUES %s" % (table, cols, tuples_str) on_conflict_statement = 'ON CONFLICT (' + ', '.join(map(str, pkeys)) + ')' do_update_statement = _create_update_set_statement(list(df.columns)) # SQL quert to execute query = insert_statement + ' ' + on_conflict_statement + ' ' + do_update_statement cursor = conn.cursor() try: cursor.execute(query) conn.commit() except (Exception, psycopg2.DatabaseError) as error: print("Error: %s" % error) conn.rollback() cursor.close() return 1 cursor.close()
def table_columns(conn: psycopg2.connect, table: str) -> tuple: """ Pulls all columns in a table """ table = table.lower() query = f''' SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA = 'public' AND TABLE_NAME = '{table}' ''' cursor = conn.cursor() try: cursor.execute(query) cols = cursor.fetchall() cols = [col[0] for col in cols] cursor.close() return cols except (Exception, psycopg2.DatabaseError) as error: print("Error: %s" % error) conn.rollback() cursor.close() return 1
def insert_data(conn: psycopg2.connect, df: pd.DataFrame) -> None: ''' Bulk insert dataframe into advertisementdata table. This function was inspired by Naysan Saran's article "Pandas to PostgreSQL using Psycopg2: Bulk Insert Performance Benchmark", in which the author chose a variety of bulk insert methods and compared their execution time. Saving the dataframe to a StringIO object and then copying this to the database proved to be the most efficient when dealing with millions of records. Source: https://naysan.ca/2020/05/09/pandas-to-postgresql-using-psycopg2-bulk-insert-performance-benchmark/ ''' set_index(conn, df) buffer = StringIO() df.to_csv(buffer, index_label='id', header=False) buffer.seek(0) cursor = conn.cursor() try: cursor.copy_from(buffer, 'advertisementdata', sep=",") conn.commit() except (Exception, psycopg2.DatabaseError) as error: logging.error(f"Error inserting data: {error}") conn.rollback() cursor.close() cursor.close()
def execute_query(conn: psycopg2.connect, query: str) -> None: cursor = conn.cursor() try: cursor.execute(query) conn.commit() except (Exception, psycopg2.DatabaseError) as error: logging.error(f"Unable to execute query. Error: {error}") conn.rollback() cursor.close() cursor.close()
def drop_rows(conn: psycopg2.connect, table: str, where_condition: str) -> None: ''' Drops rows from a table based on a set of conditions ''' query = f''' DELETE FROM {table} WHERE {where_condition} ''' cursor = conn.cursor() try: cursor.execute(query) conn.commit() except (Exception, psycopg2.DatabaseError) as error: print("Error: %s" % error) conn.rollback() cursor.close() return 1 cursor.close()
def insert_rows(conn: psycopg2.connect, df: pd.DataFrame, table: str) -> None: ''' Inserts the df values into the DB table ''' # Create a list of tupples from the dataframe values tuples = [tuple(x) for x in df.to_numpy()] tuples_str = ', '.join(map(str, tuples)) # Comma-separated dataframe columns cols = ','.join(list(df.columns)) # SQL quert to execute query = "INSERT INTO %s(%s) VALUES %s" % (table, cols, tuples_str) cursor = conn.cursor() try: cursor.execute(query) conn.commit() except (Exception, psycopg2.DatabaseError) as error: print("Error: %s" % error) conn.rollback() cursor.close() return 1 cursor.close()