Beispiel #1
0
import os

from data_readiness.read_db import get_dfs
from data_readiness.label_df_cleaning import join_to_main_df as img_join
from data_readiness.text_preprocess import nlp_join, get_nlp_df
from data_collection.misc import read_yaml

folder = os.path.dirname(__file__)
config_file = os.path.join(folder, '..', 'conf.yaml')
config = read_yaml(config_file)
csv_path = config['csv_path']
data_path = csv_path + 'train.csv'

if not os.path.exists(csv_path):
    os.mkdir(csv_path)
    print(f'Made path! {csv_path}')

# Allow price thresholding
price_range = (20, 100)

# Read in data from PSQL
df, _, _ = get_dfs()

# Collect NLP data with above method.
nlp_df = get_nlp_df(df)


def df_filtering(df, prices, img_opts='all', nlp=True, **kwargs):
    """Method that filters out main dataframe based
    on image criteria and joins img/nlp dataframes.
    Args:
Beispiel #2
0
import pandas as pd
import os

from data_collection.misc import read_yaml
from data_collection.req_to_db import psql_connect


folder = os.path.dirname(__file__)
config_file = os.path.join(folder, '..', 'conf.yaml')
config = read_yaml(config_file)
secrets = read_yaml(os.path.join(folder, '..', config['secrets']))


def psql_to_pandas(query=''):
    """Method to read in generic query from PSQL DB.
    Args:
        query: str
    Returns:
        pd.DataFrame"""
    return pd.read_sql(query, psql_connect(config, secrets))


def get_dfs():
    """Method that reads the three tables from PSQL.
    Returns:
        (pd.DataFrame,) * 3"""
    df1 = psql_to_pandas('SELECT * FROM main;')
    df2 = psql_to_pandas('SELECT * FROM imgs;')
    df3 = psql_to_pandas('SELECT * FROM bids;')
    return df1, df2, df3