Example #1
0
def upsert_rows(conn: psycopg2.connect, df: pd.DataFrame, table: str, pkeys: list) -> None:
    """
    Using cursor.mogrify() to build the bulk insert query
    then cursor.execute() to execute the query
    """
    
    # Create a list of tupples from the dataframe values
    tuples = [tuple(x) for x in df.to_numpy()]
    tuples_str = ', '.join(map(str, tuples))
    
    # Comma-separated dataframe columns
    cols = ','.join(list(df.columns))
    
    insert_statement = "INSERT INTO %s(%s) VALUES %s" % (table, cols, tuples_str)
    on_conflict_statement = 'ON CONFLICT (' + ', '.join(map(str, pkeys)) + ')'
    do_update_statement = _create_update_set_statement(list(df.columns))
    
    # SQL quert to execute
    query  = insert_statement + ' ' + on_conflict_statement + ' ' + do_update_statement
    
    cursor = conn.cursor()
    try:
        cursor.execute(query)
        conn.commit()
    except (Exception, psycopg2.DatabaseError) as error:
        print("Error: %s" % error)
        conn.rollback()
        cursor.close()
        return 1
    cursor.close()
Example #2
0
def table_columns(conn: psycopg2.connect, table: str) -> tuple:
    """ Pulls all columns in a table """
    
    table = table.lower()
    query = f'''
        SELECT COLUMN_NAME
        FROM INFORMATION_SCHEMA.COLUMNS
        WHERE TABLE_SCHEMA = 'public'
            AND TABLE_NAME = '{table}'
    '''
    
    cursor = conn.cursor()
    try:
        cursor.execute(query)
        
        cols = cursor.fetchall()
        cols = [col[0] for col in cols]
        
        cursor.close()
        
        return cols
        
    except (Exception, psycopg2.DatabaseError) as error:
        print("Error: %s" % error)
        conn.rollback()
        cursor.close()
        
        return 1
Example #3
0
def insert_data(conn: psycopg2.connect, df: pd.DataFrame) -> None:
    '''
    Bulk insert dataframe into advertisementdata table.

    This function was inspired by Naysan Saran's article "Pandas to PostgreSQL using Psycopg2: Bulk Insert Performance
    Benchmark", in which the author chose a variety of bulk insert methods and compared their execution time. Saving the
    dataframe to a StringIO object and then copying this to the database proved to be the most efficient when dealing
    with millions of records.

    Source: https://naysan.ca/2020/05/09/pandas-to-postgresql-using-psycopg2-bulk-insert-performance-benchmark/
    '''

    set_index(conn, df)

    buffer = StringIO()
    df.to_csv(buffer, index_label='id', header=False)
    buffer.seek(0)
    cursor = conn.cursor()

    try:
        cursor.copy_from(buffer, 'advertisementdata', sep=",")
        conn.commit()
    except (Exception, psycopg2.DatabaseError) as error:
        logging.error(f"Error inserting data: {error}")
        conn.rollback()
        cursor.close()

    cursor.close()
Example #4
0
def execute_query(conn: psycopg2.connect, query: str) -> None:
    cursor = conn.cursor()

    try:
        cursor.execute(query)
        conn.commit()
    except (Exception, psycopg2.DatabaseError) as error:
        logging.error(f"Unable to execute query. Error: {error}")
        conn.rollback()
        cursor.close()

    cursor.close()
Example #5
0
def drop_rows(conn: psycopg2.connect, table: str, where_condition: str) -> None:
    ''' Drops rows from a table based on a set of conditions '''

    query  = f'''
        DELETE FROM {table}
        WHERE {where_condition}
    '''
    
    cursor = conn.cursor()
    try:
        cursor.execute(query)
        conn.commit()
    except (Exception, psycopg2.DatabaseError) as error:
        print("Error: %s" % error)
        conn.rollback()
        cursor.close()
        return 1
    cursor.close()
Example #6
0
def insert_rows(conn: psycopg2.connect, df: pd.DataFrame, table: str) -> None:
    ''' Inserts the df values into the DB table '''
    
    # Create a list of tupples from the dataframe values
    tuples = [tuple(x) for x in df.to_numpy()]
    tuples_str = ', '.join(map(str, tuples))
    
    # Comma-separated dataframe columns
    cols = ','.join(list(df.columns))
    
    # SQL quert to execute
    query  = "INSERT INTO %s(%s) VALUES %s" % (table, cols, tuples_str)
    
    cursor = conn.cursor()
    try:
        cursor.execute(query)
        conn.commit()
    except (Exception, psycopg2.DatabaseError) as error:
        print("Error: %s" % error)
        conn.rollback()
        cursor.close()
        return 1
    cursor.close()