Ejemplo n.º 1
0
def drop_duplicates_from_db(db_table=None, groupby_cols=None, keep_last=True, *args, **kwargs):
    """
    :Description:
    Drop duplicate records from specified database table

    :Params:
    db_table: Name of the database table
        type: str
        default: None
    groupby_cols: list of columns to check for duplicates
        type: list
        default: None
    keep_last: keep the last duplicated entry?
        type: bool
        default: True
    returns: Nothing

    :Dependencies:
    Python3
    psycopg2

    :Example:
    drop_duplicates_from_db(db_table='Waze', groupby_cols=['ID', 'Date_Time'], keep_last=True)
    """

    import psycopg2

    params = get_params(*args, **kwargs)

    # Make sure the type is list
    if type(groupby_cols) == list:
        cols = '", "'.join(groupby_cols)
    else:
        cols = '", "'.join([groupby_cols])

    if keep_last:
        keep = 'MAX'
    else:
        keep = 'MIN'

    # Build command
    command = '''DELETE FROM ''' + '{0}."{1}" WHERE ctid NOT IN (SELECT {2}(ctid) FROM {0}."{1}" ' \
                                   'GROUP BY "{3}");'.format(params['schema'], db_table, keep, cols)

    # Assemble the database metadata
    conn_str = "host='{0}' dbname='{1}' port='{2}' user='******' password='******'".format(params['host'],
                                                                                     params['database'],
                                                                                     params['port'],
                                                                                     params['username'],
                                                                                     params['password'])

    # Connect to database
    conn = psycopg2.connect(conn_str)
    cursor = conn.cursor()

    # Execute command
    cursor.execute(command)
    conn.commit()
Ejemplo n.º 2
0
def create_new_column(db_table, col_name, col_type, *args, **kwargs):
    """
    :Description:
    Add a column to a database table

    :Params:
    db_table: Database table name
        type: str
    col_name: Name of the new column
        type: str
    col_type: Datatype of the new column
        type: str

    :Returns:
    Nothing, creates new column in database table if it doesn't exist

    :Dependencies:
    Python3
    psycopg2

    :Example:
    create_new_column('some_table', 'new_col', 'integer')
    """

    import psycopg2

    params = get_params(*args, **kwargs)

    # Assemble the database metadata
    conn_str = "host='{0}' dbname='{1}' port='{2}' user='******' password='******'".format(
        params['host'], params['database'], params['port'], params['username'],
        params['password'])

    base_command = '''SELECT * FROM ''' + params['schema']

    # Create the database query
    command = base_command + '."{0}" ADD COLUMN IF NOT EXISTS "{1}" {2}'.format(
        db_table, col_name, col_type)

    # Connect to the database
    conn = psycopg2.connect(conn_str)
    cursor = conn.cursor()

    # Execute the command
    cursor.execute(command)

    conn.commit()
Ejemplo n.º 3
0
def update_db_cell(db_table=None,
                   new_value=None,
                   new_column=None,
                   column_values=None,
                   update_all=False,
                   indent='',
                   *args,
                   **kwargs):
    """
    :Description:
    Pushes new status to the internal status dashboard database table
    Removes old status from the table

    :Params:
    db_table: Name of the database table
        type: str
        default: None
    new_value: New value for the cell
        type: str
        default: None
    new_column: Column of the new value
        type: str
        default: None
    column_values: Column names and values to match
        type: dict
        format: {column_name_0: value_0, column_name_1: value_1, ...}
        default: None
    update_all: Update every cell matching column_values?
        type: bool
        default: False
    text:
        type: bool
        default: False
    returns: Nothing, updates the specified cell of a database table

    :Dependencies:
    Python3
    pandas
    psycopg2

    :Notes:
    If new_value is set to None, the row will be deleted

    :Example:
    update_internal_status_db('automation/waze/some_script', 'VM1', True, 'Something went wrong...script failed')
    """

    from psycopg2 import connect

    params = get_params(*args, **kwargs)

    wheres = []

    for i in column_values:
        wheres.append("\"{0}\" = '{1}'".format(i, column_values[i]))

    wheres = ' AND '.join(wheres)

    command = '''SELECT COUNT(*) FROM ''' + '{0}."{1}" WHERE {2}'.format(
        params['schema'], db_table, wheres)

    # Assemble the database metadata
    conn_str = "host='{0}' dbname='{1}' port='{2}' user='******' password='******'".format(
        params['host'], params['database'], params['port'], params['username'],
        params['password'])

    # Connect to database
    conn = connect(conn_str)
    cursor = conn.cursor()

    cursor.execute(command)
    conn.commit()

    records = cursor.fetchall()
    num = records[0][0]

    if num == 1:

        if new_value is not None:
            command = '''UPDATE ''' + '{0}."{1}" SET "{2}" = \'{3}\' WHERE {4}'.format(
                params['schema'], db_table, new_column, new_value, wheres)
            msg = 'Value updated'
        else:
            command = '''DELETE FROM ''' + '{0}."{1}" WHERE {2}'.format(
                params['schema'], db_table, wheres)
            msg = 'Row deleted'

        # Execute command
        cursor.execute(command)
        conn.commit()

        print(indent + msg)

    elif num == 0:

        print(indent + 'Location not found')

    elif (num > 1) & update_all:

        if new_value is not None:
            command = '''UPDATE ''' + '{0}."{1}" SET "{2}" = \'{3}\' WHERE {4}'.format(
                params['schema'], db_table, new_column, new_value, wheres)
            msg = 'All values updated'
        else:
            command = '''DELETE FROM ''' + '{0}."{1}" WHERE {2}'.format(
                params['schema'], db_table, wheres)
            msg = 'All rows deleted'

        # Execute command
        cursor.execute(command)
        conn.commit()

        print(indent + msg)

    else:
        print(
            indent +
            'Multiple locations found. Be more specific or set "update_all=True" to update value in all locations.'
        )

    conn.close()
Ejemplo n.º 4
0
def to_db_new(df, db_table, d_types=None, index=False, chunk_size=None, *args, **kwargs):
    """
    :Description:
    Pushes data to the selected database table

    :Params:
    df: Data
        type: pandas DataFrame
    db_table: The name of the database table
        type: str
    d_types: The datatypes of each column
        type: dict
        default: None
    index: Whether or not to use index
        type: bool
        default: False
    chunk_size: Number of rows to send at a time
        type: int
        default: None
    returns: Nothing, pushes data to database table

    :Dependencies:
    Python3
    sqlalchemy

    :Example:
    to_db_new(df, 'Waze')
    """

    import psycopg2
    import traceback
    from pandas.api.types import is_numeric_dtype
    from generic.get_params import get_params

    params = get_params(*args, **kwargs)

    dd = df.copy()

    if index:
        df.reset_index(inplace=True)

    cols = dd.columns

    for c in cols:
        if is_numeric_dtype(dd[c]):
            dd[c] = dd[c].astype(str)
        else:
            dd[c] = dd[c].astype(str)
            dd[c] = "'" + dd[c] + "'"

    dd = dd.to_numpy()

    # Assemble the database metadata
    conn_str = "host='{0}' dbname='{1}' port='{2}' user='******' password='******'".format(params['host'],
                                                                                     params['database'],
                                                                                     params['port'],
                                                                                     params['username'],
                                                                                     params['password'])

    try:
        # Connect to database
        conn = psycopg2.connect(conn_str)

        # Create new cursor
        cursor = conn.cursor()

        n = 0
        L = len(dd)
        cols = ', '.join(['"{0}"'.format(i) for i in cols])

        if chunk_size is None:
            chunk_size = L

        exists = check_if_table_exists(cursor, params['schema'], db_table)

        if not exists:
            create_table(df.head(0), db_table, params, d_types)

        command = '''INSERT INTO ''' + '{0}."{1}" ({2}) VALUES ('.format(params['schema'], db_table, cols)

        while n < L:
            # Chunk data and convert to string
            data = "), (".join([", ".join(i) for i in dd[n: n + chunk_size]]) + ')'

            # Execute command
            cursor.execute(command + data)

            n += chunk_size

        # Commit changes to database
        conn.commit()

        # Close connection to database
        cursor.close()

    except:
        print(traceback.format_exc())
    finally:
        # Make sure connection to database is closed
        if conn is not None:
            conn.close()
Ejemplo n.º 5
0
def load_data(db_table=None,
              start=None,
              end=None,
              date_col='date_time',
              chunk_size=100000,
              time_zone='America/New_York',
              *args,
              **kwargs):
    """
    :Description:
    Load data from database

    :Params:
    db_table: Database table name
        type: str
        default: None
    start: Start date[time]
        type: str
        default: None
        format: YYYY-MM-DD [HH:MM:SS]
    end: End date[time]
        type: str
        default: None
        format: YYYY-MM-DD [HH:MM:SS]
    date_col: Datetime column
        type: str
        default: date_time
    chunk_size: Number of rows to load at a time
        type: int
        default: 100000

    :Returns:
    df: Data from given table for specified time frame
        type: pandas DataFrame

    :Dependencies:
    Python3
    psycopg2

    :Notes:
    If start is not given, all data before end will be loaded
    If end is not given, all data after start will be loaded
    If neither start nor end are given, all data will be loaded

    :Example:
    load_data(db_table='some_table', start='2019-01-01')
    """

    import psycopg2
    import pandas as pd

    params = get_params(*args, **kwargs)

    base_command = '''SELECT * FROM ''' + '{0}."{1}"'.format(
        params['schema'], db_table)

    # Create the database query
    if start is None and end is None:
        pass
    elif start is None:
        base_command = base_command + ' WHERE "{1}" < \'{2} {3}\''.format(
            db_table, date_col, end, time_zone)
    elif end is None:
        base_command = base_command + ' WHERE "{1}" >= \'{2} {3}\''.format(
            db_table, date_col, start, time_zone)
    else:
        base_command = base_command + ' WHERE "{1}" >= \'{2} {4}\' AND "{1}" < \'{3} {4}\''.format(
            db_table, date_col, start, end, time_zone)

    # Assemble the database metadata
    conn_str = "host='{0}' dbname='{1}' port='{2}' user='******' password='******'".format(
        params['host'], params['database'], params['port'], params['username'],
        params['password'])

    # Connect to the database
    conn = psycopg2.connect(conn_str)
    cursor = conn.cursor()

    # Grab the first row from the database table
    # Needed to get the column names
    cursor.execute(base_command + ' LIMIT 1'.format(db_table))
    col_names = [desc[0] for desc in cursor.description]

    df = pd.DataFrame()

    # Iterate through the database rows
    # Needed if the query selection is too large to fit in the RAM
    n = 0
    while True:
        # Add LIMIT and OFFSET to the command
        command = base_command + " LIMIT " + str(
            chunk_size) + " OFFSET " + str(n * chunk_size)

        # Execute the command
        cursor.execute(command)
        records = cursor.fetchall()

        temp = pd.DataFrame(records)

        # Breaks when finished
        try:
            temp.columns = col_names
        except ValueError:
            break

        df = df.append(temp)

        n += 1

    del temp

    # Convert date column to datetime format
    try:
        df[date_col] = pd.to_datetime(df[date_col], infer_datetime_format=True)
    except:
        pass

    if df.empty:
        df = pd.DataFrame(columns=col_names)

    return df
Ejemplo n.º 6
0
def drop_from_db(db_table=None, date_col='date_time', pre_date=None, post_date=None, *args, **kwargs):
    """
    :Description:
    Drops all entries between the start/end dates from the specified database table

    :Params:
    db_table: The name of the database table
        type: str
        default: None
    date_col: The name of the date column
        type: str
        default: None
    pre_date: The starting date
        type: str
        format: YYYY-MM-DD
        default: None
    post_date: The ending date
        type: str
        format: YYYY-MM-DD
        default: None
    returns: Nothing

    :Dependencies:
    Python3
    psycopg2

    :Notes:
    Will grab all data if start/end dates are not specified

    :Example:
    drop_from_db(db_table='Waze', date_col='Date_Time', start='2018-01-01', end='2018-02-01')
    """

    import psycopg2

    params = get_params(*args, **kwargs)

    print('{0}\n{1}\t-\t{2}\n'.format(db_table, pre_date, post_date))

    # Build command
    if (pre_date is None) and (post_date is None):
        command = '''DELETE FROM ''' + '{0}."{1}"'.format(params['schema'], db_table)
    elif pre_date is None:
        command = '''DELETE FROM ''' + '{0}."{1}" WHERE "{2}" < \'{3}\''.format(params['schema'], db_table, date_col,
                                                                                post_date)
    elif post_date is None:
        command = '''DELETE FROM ''' + '{0}."{1}" WHERE "{2}" >= \'{3}\''.format(params['schema'], db_table, date_col,
                                                                                 pre_date)
    else:
        command = '''DELETE FROM ''' + '{0}."{1}" WHERE "{2}" >= \'{3}\' AND "{2}" < \'{4}\''.format(params['schema'],
                                                                                                     db_table,
                                                                                                     date_col,
                                                                                                     pre_date,
                                                                                                     post_date)

    # Assemble the database metadata
    conn_str = "host='{0}' dbname='{1}' port='{2}' user='******' password='******'".format(params['host'],
                                                                                     params['database'],
                                                                                     params['port'],
                                                                                     params['username'],
                                                                                     params['password'])

    # Connect to database
    conn = psycopg2.connect(conn_str)
    cursor = conn.cursor()

    # Execute command
    cursor.execute(command)
    conn.commit()
Ejemplo n.º 7
0
def update_internal_status_db(script,
                              location,
                              flag,
                              description,
                              date=None,
                              db_table='internal_status_dashboard',
                              *args,
                              **kwargs):
    """
    :Description:
    Pushes new status to the internal status dashboard database table
    Removes old status from the table

    :Params:
    script: Name of the script
        type: str
    location: Location of the script
        type: str
    flag: Did the script trip a flag?
        type: bool
    description: Description of the flag
        type: str
    date: Date/time that the script was run
        type: str
        default: None
    db_table: Name of the database table
        type: str
        default: internal_status_dashboard
    date_col: Name of the date column
        type: str
        default: date_time
    returns: Nothing, updates the internal status dashboard database table

    :Dependencies:
    Python3
    pandas

    :Example:
    update_internal_status_db('automation/waze/some_script', 'VM1', True, 'Something went wrong...script failed')
    """

    from time import gmtime, strftime
    from datetime import datetime
    from psycopg2 import connect

    params = get_params(*args, **kwargs)

    # Grab the current UTC date
    if date is None:
        date = gmtime()
        date = strftime('%Y-%m-%d %H:%M:%S', date)
        date = datetime.strptime(date, '%Y-%m-%d %H:%M:%S')

    print(date)

    # Convert the flag to a string
    if flag:
        flag = 'FLAG'
    else:
        flag = ''

    # Assemble the database metadata
    conn_str = "host='{0}' dbname='{1}' port='{2}' user='******' password='******'".format(
        params['host'], params['database'], params['port'], params['username'],
        params['password'])

    # Connect to database
    conn = connect(conn_str)
    cursor = conn.cursor()

    command = '''SELECT COUNT(*) FROM ''' + \
              "{0}.\"{1}\" WHERE \"location\" LIKE '{2}' AND script LIKE '{3}'".format(params['schema'], db_table,
                                                                                       location, script)

    cursor.execute(command)
    conn.commit()

    records = cursor.fetchall()
    num = records[0][0]

    # Will fail if script doesn't exist in table yet
    if num == 1:
        # Update flag
        command = '''UPDATE ''' + '{0}."{1}" SET "flag"'.format(params['schema'], db_table) + \
                  " = '{0}' WHERE \"location\" LIKE '{1}' AND script LIKE '{2}'".format(flag, location, script)

        cursor.execute(command)
        conn.commit()

        # Update description
        command = '''UPDATE ''' + '{0}."{1}" SET "description"'.format(params['schema'], db_table) + \
                  " = '{0}' WHERE \"location\" LIKE '{1}' AND script LIKE '{2}'".format(description, location, script)

        cursor.execute(command)
        conn.commit()

        # Update date_time
        command = '''UPDATE ''' + '{0}."{1}" SET "date_time"'.format(params['schema'], db_table) + \
                  " = '{0}' WHERE \"location\" LIKE '{1}' AND script LIKE '{2}'".format(date, location, script)

        # Execute command
        cursor.execute(command)
        conn.commit()

    elif num == 0:
        print('script doesnt exist yet')
        from generic.to_db import to_db
        import pandas as pd

        df = pd.DataFrame(
            {
                'script': script,
                'date_time': date,
                'location': location,
                'flag': flag,
                'description': description
            },
            index=[0])

        # Push new status to database
        to_db(df, db_table)

    conn.close()