def update_step_data_with_step_pv_output(step_configuration): """ Author : Elinor Thorne / Nassir Mohammad Date : July 2018 Purpose : Updates data with the process variable output. Parameters : conn - connection object pointing at the database. : step - Returns : NA """ # Construct string for SQL statement cols = [ item.replace("'", "") for item in step_configuration["pv_columns2"] ] cols = ["SSS." + item + " = CALC." + item for item in cols] set_statement = ", ".join(map(str, cols)) # Construct and execute SQL statement pv_table = step_configuration["pv_table"] data_table = step_configuration["data_table"] sql = f""" UPDATE {data_table} as SSS, {pv_table} as CALC SET {set_statement} WHERE SSS.REC_ID = CALC.REC_ID """ db.execute_sql_statement(sql) # Cleanse temporary tables db.delete_from_table(step_configuration["pv_table"]) db.delete_from_table(step_configuration["temp_table"]) db.delete_from_table(SAS_PROCESS_VARIABLES_TABLE) db.delete_from_table(step_configuration["sas_ps_table"])
def import_unsampled(file_name, file_type, run_id): data_schema = unsampled_schema.get_schema() # Convert CSV to dataframe and stage dataframe = pd.read_csv(file_name, engine="python", dtype=data_schema) dataframe.columns = dataframe.columns.str.upper() dataframe.columns = dataframe.columns.str.replace(' ', '') dataframe["RUN_ID"] = run_id dataframe.rename(columns={"DATASOURCE": "DATA_SOURCE_ID"}, inplace=True) # replace "REGION" values with 0 if not an expected value dataframe['REGION'].replace(['None', "", ".", 'nan'], 0, inplace=True) datasource_id = file_type.value datasource_id = datasource_id dataframe['DATA_SOURCE_ID'].replace(['Unsampled'], datasource_id, inplace=True) sql = f"DELETE FROM UNSAMPLED_OOH_DATA WHERE RUN_ID = '{run_id}'" try: db.execute_sql_statement(sql) db.insert_dataframe_into_table('UNSAMPLED_OOH_DATA', dataframe) except Exception as err: log.error( f"Cannot insert unsampled_data dataframe into database: {err}") return None
def move_survey_subsample_to_sas_table(run_id, step_name): """ Author : Elinor Thorne Date : Apr 2018 Purpose : Moves data to temporary location Parameters : NA Returns : NA """ columns = ["" + col + "" for col in COLUMNS_TO_MOVE] columns = ','.join(columns) # Assign RESPNSE condition to step if step_name == "TRAFFIC_WEIGHT" or step_name == "UNSAMPLED_WEIGHT": respnse = "BETWEEN 1 and 2" else: respnse = "BETWEEN 1 and 6" sql = f""" INSERT INTO {SAS_SURVEY_SUBSAMPLE_TABLE} ({columns}) (SELECT {columns} FROM {SURVEY_SUBSAMPLE_TABLE} WHERE RUN_ID = '{run_id}' AND SERIAL NOT LIKE '9999%%' AND RESPNSE {respnse}) """ db.execute_sql_statement(sql)
def insert(**kwargs): val = f"INSERT INTO {table} (" if len(kwargs) == 1: key, value = kwargs.popitem() if isinstance(value, str): value = '"' + value + '"' val += f' {key}) VALUES({value})' else: i = 0 for key, _ in kwargs.items(): val += key i = i + 1 if i != len(kwargs): val += ', ' else: val += ') VALUES (' i = 0 for _, value in kwargs.items(): if isinstance(value, str): value = '"' + value + '"' val += value i = i + 1 if i != len(kwargs): val += ', ' else: val += ') ' log.debug(val) db.execute_sql_statement(val)
def update_survey_data_with_step_pv_output(step_configuration): """ Author : Elinor Thorne / Nassir Mohammad Date : Apr 2018 Purpose : Updates survey_data with the process variable outputs Parameters : conn - connection object pointing at the database : step - Returns : NA """ # Assign variables spv_table = step_configuration["spv_table"] # Construct string for SQL statement cols = [item.replace("'", "") for item in step_configuration["pv_columns"]] cols = ["SSS." + item + " = CALC." + item for item in cols] set_statement = ", ".join(map(str, cols)) sql = f""" UPDATE {SAS_SURVEY_SUBSAMPLE_TABLE} AS SSS, {spv_table} AS CALC SET {set_statement} WHERE SSS.SERIAL = CALC.SERIAL """ db.execute_sql_statement(sql) # Cleanse temp tables db.delete_from_table(SAS_PROCESS_VARIABLES_TABLE) db.delete_from_table(spv_table) # code specific to minimums weight function/step # TODO: consider moving this out to another function called by minimum weight if step_configuration["name"] == "MINIMUMS_WEIGHT": db.delete_from_table(step_configuration["temp_table"]) db.delete_from_table(step_configuration["sas_ps_table"])
def copy_step_pvs_for_survey_data(run_id, step_configuration): """ Author : Elinor Thorne Date : April 2018 Purpose : Copy step process variable data Parameters : run_id - : conn - : step - Returns : NA """ spv_table = step_configuration["spv_table"] # Cleanse tables db.delete_from_table(SAS_PROCESS_VARIABLES_TABLE) db.delete_from_table(spv_table) step = step_configuration["name"] # Loops through the pv's and inserts them into the process variable table count = 0 for item in step_configuration["pv_columns"]: count = count + 1 sql = f""" INSERT INTO {SAS_PROCESS_VARIABLES_TABLE} (PROCVAR_NAME, PROCVAR_RULE, PROCVAR_ORDER)(SELECT PV.PV_NAME, PV.PV_DEF, {count} FROM PROCESS_VARIABLE_PY AS PV WHERE PV.RUN_ID = '{run_id}' AND UPPER(PV.PV_NAME) IN ('{item}')) """ db.execute_sql_statement(sql)
def populate_step_data(run_id, step_configuration): """ Author : Elinor Thorne Date : April 2018 Purpose : Populate step Parameters : run_id - : conn - : step - Returns : NA """ # Assign variables table = step_configuration["table_name"] data_table = step_configuration["data_table"] columns = step_configuration["insert_to_populate"] cols = ", ".join(map(str, columns)) # Construct string for SQL statement calc_cols = ["CALC." + col for col in columns] calc_columns = ", ".join(map(str, calc_cols)) # Cleanse temp table db.delete_from_table(data_table) # Create and execute SQL statement sql = f""" INSERT INTO {data_table} ({cols}) SELECT {calc_columns} FROM {table} AS CALC WHERE RUN_ID = '{run_id}' """ db.execute_sql_statement(sql)
def import_non_response(file_name, file_type, run_id): data_schema = non_response_schema.get_schema() # Convert CSV to dataframe and stage dataframe = pd.read_csv(file_name, engine="python", dtype=data_schema) dataframe.columns = dataframe.columns.str.upper() dataframe.columns = dataframe.columns.str.replace(' ', '') dataframe["RUN_ID"] = run_id dataframe.rename(columns={"DATASOURCE": "DATA_SOURCE_ID"}, inplace=True) datasource_id = file_type.value datasource_id = datasource_id dataframe['DATA_SOURCE_ID'].replace(['Non Response'], datasource_id, inplace=True) sql = f""" DELETE FROM NON_RESPONSE_DATA WHERE RUN_ID = '{run_id}' """ try: db.execute_sql_statement(sql) db.insert_dataframe_into_table('NON_RESPONSE_DATA', dataframe) except Exception as err: log.error(f"Cannot insert non_response dataframe into table: {err}") return None
def update_others(table): sql1 = f""" UPDATE {SAS_SURVEY_SUBSAMPLE_TABLE}, {table} SET {SAS_SURVEY_SUBSAMPLE_TABLE}.SPEND = {table}.SPEND WHERE ({SAS_SURVEY_SUBSAMPLE_TABLE}.SERIAL = {table}.SERIAL) AND {table}.SPEND >=0 """ db.execute_sql_statement(sql1)
def update_imbalance_weights(table, results_columns): sql1 = sql_update_statement(table, results_columns) sql2 = f""" UPDATE {SAS_SURVEY_SUBSAMPLE_TABLE} SET IMBAL_WT = 1.00 WHERE IMBAL_WT IS NULL """ db.execute_sql_statement(sql1) db.execute_sql_statement(sql2)
def update_stay_imputation(table, results_columns): sql1 = sql_update_statement(table, results_columns) sql2 = """ update SAS_SURVEY_SUBSAMPLE SET STAY = NUMNIGHTS WHERE SERIAL NOT IN (SELECT SERIAL FROM SAS_STAY_IMP) """ db.execute_sql_statement(sql1) db.execute_sql_statement(sql2)
def update_spend_imputation(table, results_columns): sql1 = """ UPDATE SAS_SURVEY_SUBSAMPLE AS SSS, SAS_SPEND_IMP AS SSI SET SSS.SPEND = SSI.NEWSPEND WHERE SSS.SERIAL = SSI.SERIAL AND SSS.SERIAL IN (SELECT SERIAL from SAS_SPEND_IMP where NEWSPEND >= 0) """ sql2 = sql_update_statement(table, results_columns) db.execute_sql_statement(sql1) db.execute_sql_statement(sql2)
def is_valid_run_id(run_id: str) -> bool: sql = f"select run_id from SURVEY_SUBSAMPLE where run_id = '{run_id}'" result = db.execute_sql_statement(sql).first() if result is not None: return True else: return False
def store_survey_data_with_step_results(run_id, step_configuration): """ Author : Elinor Thorne Date : April 2018 Purpose : Stores the survey data with the results Parameters : run_id - : conn - connection object pointing at the database. Returns : NA """ step = step_configuration["name"] cols = step_configuration["nullify_pvs"] # Add additional column to two steps if (step == "SPEND_IMPUTATION") or (step == "RAIL_IMPUTATION"): cols.append("SPEND") cols = ["SS." + item + " = SSS." + item for item in cols] set_statement = " , ".join(cols) # Create SQL statement and execute sql = f""" UPDATE {SURVEY_SUBSAMPLE_TABLE} AS SS, {SAS_SURVEY_SUBSAMPLE_TABLE} AS SSS SET {set_statement} WHERE SS.SERIAL = SSS.SERIAL AND SS.RUN_ID = '{run_id}' """ db.execute_sql_statement(sql) if os.getenv("POPULATE_TEST_DATA") == 'True': ctf.populate_test_data(SURVEY_SUBSAMPLE_TABLE, run_id, step_configuration, dataset='survey') # Cleanse summary and subsample tables as applicable ps_tables_to_delete = [ "SHIFT_WEIGHT", "NON_RESPONSE", "MINIMUMS_WEIGHT", "TRAFFIC_WEIGHT", "UNSAMPLED_WEIGHT", "IMBALANCE_WEIGHT", "FINAL_WEIGHT" ] if step in ps_tables_to_delete: db.delete_from_table(step_configuration["ps_table"], "RUN_ID", "=", run_id) db.delete_from_table(SAS_SURVEY_SUBSAMPLE_TABLE)
def copy_step_pvs_for_step_data(run_id, step_configuration): """ Author : Elinor Thorne / Nassir Mohammad Date : July 2018 Purpose : Copies the process variables for the step. Parameters : run_id - : conn - connection object pointing at the database. : step - Returns : NA """ # Cleanse temp tables db.delete_from_table(SAS_PROCESS_VARIABLES_TABLE) db.delete_from_table(step_configuration["pv_table"]) # Construct and execute SQL statements as applicable if step_configuration["name"] == 'UNSAMPLED_WEIGHT': order = step_configuration["order"] + 1 for item in step_configuration["pv_columns2"]: sql = (f""" INSERT INTO {SAS_PROCESS_VARIABLES_TABLE} (PROCVAR_NAME, PROCVAR_RULE, PROCVAR_ORDER) (SELECT pv.PV_NAME, pv.PV_DEF, {order} FROM PROCESS_VARIABLE_PY AS pv WHERE pv.RUN_ID = '{run_id}' AND UPPER(pv.PV_NAME) in ('{item}')) """) db.execute_sql_statement(sql) order = order + 1 else: cols = [] for item in step_configuration["pv_columns2"]: cols.append("'" + item + "'") pv_columns = ", ".join(map(str, cols)) sql = f""" INSERT INTO {SAS_PROCESS_VARIABLES_TABLE} (PROCVAR_NAME, PROCVAR_RULE, PROCVAR_ORDER) (SELECT pv.PV_NAME, pv.PV_DEF, {step_configuration["order"]} FROM PROCESS_VARIABLE_PY AS pv WHERE pv.RUN_ID = '{run_id}' AND UPPER(pv.PV_NAME) in ({pv_columns})) """ db.execute_sql_statement(sql)
def delete(**kwargs): val = f"DELETE FROM {table}" if len(kwargs) == 1: key, value = kwargs.popitem() if isinstance(value, str): value = '"' + value + '"' val += f' WHERE {key} = {value}' else: i = 0 for key, value in kwargs.items(): if isinstance(value, str): value = '"' + value + '"' val += f' WHERE {key} = {value}' i = i + 1 if i != len(kwargs): val += ' AND ' log.debug(val) db.execute_sql_statement(val)
def store_step_summary(run_id, step_configuration): """ Author : Elinor Thorne Date : May 2018 Purpose : Stores the summary data Parameters : run_id : conn - connection object pointing at the database : step - Returns : NA """ # Assign variables ps_table = step_configuration["ps_table"] sas_ps_table = step_configuration["sas_ps_table"] # Cleanse summary table as applicable db.delete_from_table(ps_table, "RUN_ID", "=", run_id) # Create selection string selection = [ col for col in step_configuration["ps_columns"] if col != "RUN_ID" ] columns = " , ".join(step_configuration["ps_columns"]) selection = " , ".join(selection) # Create and execute SQL statement sql = f""" INSERT INTO {ps_table} ({columns}) SELECT '{run_id}', {selection} FROM {sas_ps_table} """ db.execute_sql_statement(sql) if os.getenv("POPULATE_TEST_DATA") == 'True': ctf.populate_test_data(ps_table, run_id, step_configuration, dataset='summary') # Cleanse temporary summary table db.delete_from_table(sas_ps_table)
def nullify_survey_subsample_values(run_id: str, pv_values): """ Author : Elinor Thorne Date : Apr 2018 Purpose : Updates required columns to null Parameters : NA Returns : NA """ # Construct string for SQL statement columns_to_null = [] for item in pv_values: columns_to_null.append(item + " = null") columns_to_null = ", ".join(map(str, columns_to_null)) # Create SQL Statement sql = f"""UPDATE {SURVEY_SUBSAMPLE_TABLE} SET {columns_to_null} WHERE RUN_ID = '{run_id}'""" # Execute and commits the SQL command db.execute_sql_statement(sql)
def get_identity() -> Tuple: return db.execute_sql_statement('SELECT @@IDENTITY AS id').first()
def update_green(table, results_columns): sql1 = sql_update_statement(table, results_columns) db.execute_sql_statement(sql1)
def execute(sql: str): return db.execute_sql_statement(sql)