import os from data_readiness.read_db import get_dfs from data_readiness.label_df_cleaning import join_to_main_df as img_join from data_readiness.text_preprocess import nlp_join, get_nlp_df from data_collection.misc import read_yaml folder = os.path.dirname(__file__) config_file = os.path.join(folder, '..', 'conf.yaml') config = read_yaml(config_file) csv_path = config['csv_path'] data_path = csv_path + 'train.csv' if not os.path.exists(csv_path): os.mkdir(csv_path) print(f'Made path! {csv_path}') # Allow price thresholding price_range = (20, 100) # Read in data from PSQL df, _, _ = get_dfs() # Collect NLP data with above method. nlp_df = get_nlp_df(df) def df_filtering(df, prices, img_opts='all', nlp=True, **kwargs): """Method that filters out main dataframe based on image criteria and joins img/nlp dataframes. Args:
import pandas as pd import os from data_collection.misc import read_yaml from data_collection.req_to_db import psql_connect folder = os.path.dirname(__file__) config_file = os.path.join(folder, '..', 'conf.yaml') config = read_yaml(config_file) secrets = read_yaml(os.path.join(folder, '..', config['secrets'])) def psql_to_pandas(query=''): """Method to read in generic query from PSQL DB. Args: query: str Returns: pd.DataFrame""" return pd.read_sql(query, psql_connect(config, secrets)) def get_dfs(): """Method that reads the three tables from PSQL. Returns: (pd.DataFrame,) * 3""" df1 = psql_to_pandas('SELECT * FROM main;') df2 = psql_to_pandas('SELECT * FROM imgs;') df3 = psql_to_pandas('SELECT * FROM bids;') return df1, df2, df3