Example #1
0
def db_ddf_limit_offset(db, table, columns, partitions, limit, offset):
    conn = dbx.connect_db(db)
    df = pd.DataFrame()

    query = "SELECT * FROM %s limit %s offset %s;" % (table, limit, offset)
    df = pd.read_sql_query(query, conn)
    ddt = dd.from_pandas(df[columns], npartitions=partitions)

    print('table ' + table + ' loaded into dask dataframe')
    return ddt
Example #2
0
def select_sql_pd(db, table, fields, field, value):
    conn = dbx.connect_db(db)

    if type(fields) == list:
        fields = ", ".join(fields)

    value = "'%" + str(value) + "%'"

    query = "SELECT %s FROM %s WHERE %s LIKE %s;" % (fields, table, field,
                                                     str(value))

    try:
        df = pd.read_sql_query(query, conn)
        return df

    except Exception as e:
        print(e)
        pass
Example #3
0
def db_ddf(db, table, columns, partitions, chunksize, offset=0):
    '''
    Load big sql table into dask dataframe in chunks to prevent memory exhaustion

    args
    ----
    db (str): database to connect
    table (str): database table
    columns (list): list of table columns to retrieve
    partitions (int): Number of dask partitions to use
    chunksize (int): Number of rows to return in each iteration of the sql query (affects memory allocated)
    offset (int): Offset rows in query (needed for sql query iteration, default=0)

    returns
    ----
    final (object): dask dataframe
    '''

    conn = dbx.connect_db(db)
    df = pd.DataFrame()

    while True:
        query = "SELECT * FROM {} limit {} offset {};".format(
            table, chunksize, offset)
        df = pd.read_sql_query(query, conn)
        ddt = dd.from_pandas(df[columns], npartitions=partitions)
        if offset == 0:
            final = ddt
        else:
            final = dd.concat([ddt, final], axis=0, interleave_partitions=True)

        offset += chunksize

        if df.shape[0] < chunksize:
            break

    print('table ' + table + ' loaded into dask dataframe')
    return final
Example #4
0
def df_db(db, table, df, mode, index):
    conn = dbx.connect_db(db)
    df.to_sql(table, conn, if_exists=mode, index=index)
Example #5
0
def db_pd(db, table):
    conn = dbx.connect_db(db)
    query = "select * from " + table + ";"
    df = pd.read_sql_query(query, conn)
    return df