def imbalance_weight_step(run_id): """ Author : Thomas Mahoney / Elinor Thorne Date : 30 April 2018 / 2 October 2018 Purpose : Runs the imbalance weight steps of the ips process Params : run_id - the id for the current run. connection - a connection object pointing at the database. Returns : NA """ # Load configuration variables config = ServicesConfiguration().get_imbalance_weight() # Populate Survey Data For Imbalance Wt idm.populate_survey_data_for_step(run_id, config) # Copy Imbalance Wt PVs For Survey Data idm.copy_step_pvs_for_survey_data(run_id, config) # Apply Imbalance Wt PVs On Survey Data process_variables.process(dataset='survey', in_table_name='SAS_SURVEY_SUBSAMPLE', out_table_name='SAS_IMBALANCE_SPV', in_id='serial') # Update Survey Data With Imbalance Wt PVs Output idm.update_survey_data_with_step_pv_output(config) # Retrieve data from SQL survey_data = db.get_table_values(idm.SAS_SURVEY_SUBSAMPLE_TABLE) # Calculate Imbalance Weight survey_data_out, summary_data_out = \ calculate_imb_weight.do_ips_imbweight_calculation(survey_data, serial="SERIAL", shift_weight="SHIFT_WT", non_response_weight="NON_RESPONSE_WT", min_weight="MINS_WT", traffic_weight="TRAFFIC_WT", oo_weight="UNSAMP_TRAFFIC_WT", imbalance_weight="IMBAL_WT") # Insert data to SQL db.insert_dataframe_into_table(config["temp_table"], survey_data_out) db.insert_dataframe_into_table(config["sas_ps_table"], summary_data_out) # Update Survey Data With Imbalance Wt Results idm.update_survey_data_with_step_results(config) # Store Survey Data With Imbalance Wt Results idm.store_survey_data_with_step_results(run_id, config) # Store Imbalance Weight Summary idm.store_step_summary(run_id, config)
def minimums_weight_step(run_id): """ Author : Thomas Mahoney / Elinor Thorne Date : 30 April 2018 / 2 October 2018 Purpose : Runs the minimums weight steps of the ips process Params : run_id - the id for the current run. connection - a connection object pointing at the database. Returns : NA """ # Load configuration variables config = ServicesConfiguration().get_minimums_weight() # Populate Survey Data For Minimums Wt idm.populate_survey_data_for_step(run_id, config) # Copy Minimums Wt PVs For Survey Data idm.copy_step_pvs_for_survey_data(run_id, config) # Apply Minimums Wt PVs On Survey Data process_variables.process(dataset='survey', in_table_name='SAS_SURVEY_SUBSAMPLE', out_table_name='SAS_MINIMUMS_SPV', in_id='serial') # Update Survey Data with Minimums Wt PVs Output idm.update_survey_data_with_step_pv_output(config) # Retrieve data from SQL survey_data = db.get_table_values(idm.SAS_SURVEY_SUBSAMPLE_TABLE) # Calculate Minimums Weight output_data, summary_data = \ calculate_minimums_weight.do_ips_minweight_calculation(df_surveydata=survey_data, serial_num='SERIAL', shift_weight='SHIFT_WT', nr_weight='NON_RESPONSE_WT', min_weight='MINS_WT') # Insert data to SQL db.insert_dataframe_into_table(config["temp_table"], output_data) db.insert_dataframe_into_table(config["sas_ps_table"], summary_data) # Update Survey Data With Minimums Wt Results idm.update_survey_data_with_step_results(config) # Store Survey Data With Minimums Wt Results idm.store_survey_data_with_step_results(run_id, config) # Store Minimums Wt Summary idm.store_step_summary(run_id, config)
def final_weight_step(run_id): """ Author : Thomas Mahoney / Elinor Thorne Date : 30 April 2018 / 2 October 2018 Purpose : Runs the final weight steps of the ips process Params : run_id - the id for the current run. connection - a connection object pointing at the database. Returns : NA """ # Load configuration variables config = ServicesConfiguration().get_final_weight() # Populate Survey Data For Final Wt idm.populate_survey_data_for_step(run_id, config) # Retrieve data from SQL survey_data = db.get_table_values(idm.SAS_SURVEY_SUBSAMPLE_TABLE) # Calculate Final Weight survey_data_out, summary_data_out = \ calculate_final_weight.do_ips_final_wt_calculation(survey_data, serial_num='SERIAL', shift_weight='SHIFT_WT', non_response_weight='NON_RESPONSE_WT', min_weight='MINS_WT', traffic_weight='TRAFFIC_WT', unsampled_weight='UNSAMP_TRAFFIC_WT', imbalance_weight='IMBAL_WT', final_weight='FINAL_WT') # Insert data to SQL db.insert_dataframe_into_table(config["temp_table"], survey_data_out) db.insert_dataframe_into_table(config["sas_ps_table"], summary_data_out) # Update Survey Data With Final Wt Results idm.update_survey_data_with_step_results(config) # Store Survey Data With Final Wt Results idm.store_survey_data_with_step_results(run_id, config) # Store Final Weight Summary idm.store_step_summary(run_id, config)
def non_response_weight_step(run_id): """ Author : Thomas Mahoney / Elinor Thorne Date : 26 April 2018 / 2 October 2018 Purpose : Runs the non response weight steps of the ips process Params : run_id - the id for the current run. Returns : NA """ # Load configuration variables config = ServicesConfiguration().get_non_response() # Populate Survey Data For Non Response Wt idm.populate_survey_data_for_step(run_id, config) # Populate Non Response Data idm.populate_step_data(run_id, config) # Copy Non Response Wt PVs For Survey Data idm.copy_step_pvs_for_survey_data(run_id, config) # Apply Non Response Wt PVs On Survey Data process_variables.process(dataset='survey', in_table_name='SAS_SURVEY_SUBSAMPLE', out_table_name='SAS_NON_RESPONSE_SPV', in_id='serial') # Update Survey Data with Non Response Wt PVs Output idm.update_survey_data_with_step_pv_output(config) # Copy Non Response Wt PVs for Non Response Data idm.copy_step_pvs_for_step_data(run_id, config) # Apply Non Response Wt PVs On Non Response Data process_variables.process(dataset='non_response', in_table_name='SAS_NON_RESPONSE_DATA', out_table_name='SAS_NON_RESPONSE_PV', in_id='REC_ID') # Update NonResponse Data With PVs Output idm.update_step_data_with_step_pv_output(config) # Retrieve data from SQL survey_data = db.get_table_values(idm.SAS_SURVEY_SUBSAMPLE_TABLE) non_response_data = db.get_table_values(config["data_table"]) # Calculate Non Response Weight survey_data_out, summary_data_out = \ calculate_nonresponse_weight.do_ips_nrweight_calculation(survey_data, non_response_data, 'NON_RESPONSE_WT', 'SERIAL') db.insert_dataframe_into_table(config["temp_table"], survey_data_out) db.insert_dataframe_into_table(config["sas_ps_table"], summary_data_out) # Update Survey Data With Non Response Wt Results idm.update_survey_data_with_step_results(config) # Store Survey Data With NonResponse Wt Results idm.store_survey_data_with_step_results(run_id, config) # Store Non Response Wt Summary idm.store_step_summary(run_id, config)
def traffic_weight_step(run_id): """ Author : Thomas Mahoney / Elinor Thorne Date : 30 April 2018 / 2 October 2018 Purpose : Runs the traffic weight steps of the ips process Params : run_id - the id for the current run. connection - a connection object pointing at the database. Returns : NA """ # Load configuration variables config = ServicesConfiguration().get_traffic_weight() # Populate Survey Data For Traffic Wt idm.populate_survey_data_for_step(run_id, config) # Populate Traffic Data idm.populate_step_data(run_id, config) # Copy Traffic Wt PVs For Survey Data idm.copy_step_pvs_for_survey_data(run_id, config) # Apply Traffic Wt PV On Survey Data process_variables.process(dataset='survey', in_table_name='SAS_SURVEY_SUBSAMPLE', out_table_name='SAS_TRAFFIC_SPV', in_id='serial') # Update Survey Data with Traffic Wt PV Output idm.update_survey_data_with_step_pv_output(config) # Copy Traffic Wt PVs For Traffic Data idm.copy_step_pvs_for_step_data(run_id, config) # Apply Traffic Wt PV On Traffic Data process_variables.process(dataset='traffic', in_table_name='SAS_TRAFFIC_DATA', out_table_name='SAS_TRAFFIC_PV', in_id='REC_ID') # Update Traffic Data With Traffic Wt PV Output idm.update_step_data_with_step_pv_output(config) # Retrieve data from SQL survey_data = db.get_table_values(idm.SAS_SURVEY_SUBSAMPLE_TABLE) traffic_data = db.get_table_values(config["data_table"]) # Calculate Traffic Weight output_data, summary_data = do_ips_trafweight_calculation_with_R( survey_data, traffic_data) # Insert data to SQL db.insert_dataframe_into_table(config["temp_table"], output_data) db.insert_dataframe_into_table(config["sas_ps_table"], summary_data) # Update Survey Data With Traffic Wt Results idm.update_survey_data_with_step_results(config) # Store Survey Data With Traffic Wt Results idm.store_survey_data_with_step_results(run_id, config) # Store Traffic Wt Summary idm.store_step_summary(run_id, config)
def shift_weight_step(run_id): """ Author : Thomas Mahoney / Elinor Thorne Date : 26 April 2018 / 2 October 2018 Purpose : Runs the shift weight steps of the ips process Params : run_id - the id for the current run. connection - a connection object pointing at the database. Returns : NA """ # Load configuration variables config = ServicesConfiguration().get_shift_weight() # Populate Survey Data For Shift Wt idm.populate_survey_data_for_step(run_id, config) # Populate Shift Data idm.populate_step_data(run_id, config) # Copy Shift Wt PVs For Survey Data idm.copy_step_pvs_for_survey_data(run_id, config) # Apply Shift Wt PVs On Survey Data process_variables.process(dataset='survey', in_table_name='SAS_SURVEY_SUBSAMPLE', out_table_name='SAS_SHIFT_SPV', in_id='serial') # Update Survey Data with Shift Wt PV Output idm.update_survey_data_with_step_pv_output(config) # Copy Shift Wt PVs For Shift Data idm.copy_step_pvs_for_step_data(run_id, config) # Apply Shift Wt PVs On Shift Data process_variables.process(dataset='shift', in_table_name='SAS_SHIFT_DATA', out_table_name='SAS_SHIFT_PV', in_id='REC_ID') # Update Shift Data with PVs Output idm.update_step_data_with_step_pv_output(config) # Retrieve data from SQL survey_data = db.get_table_values(idm.SAS_SURVEY_SUBSAMPLE_TABLE) shift_data = db.get_table_values(config["data_table"]) # shift_data = sas_shift_schema.convert_dtype(shift_data) # Calculate Shift Weight survey_data_out, summary_data_out = \ calculate_shift_weight.do_ips_shift_weight_calculation(survey_data, shift_data, serial_number='SERIAL', shift_weight='SHIFT_WT') # Insert data to SQL db.insert_dataframe_into_table(config["temp_table"], survey_data_out) db.insert_dataframe_into_table(config["sas_ps_table"], summary_data_out) # Update Survey Data With Shift Wt Results idm.update_survey_data_with_step_results(config) # Store Survey Data With Shift Wt Results idm.store_survey_data_with_step_results(run_id, config) # Store Shift Wt Summary idm.store_step_summary(run_id, config)
def unsampled_weight_step(run_id): """ Author : Thomas Mahoney / Elinor Thorne Date : 30 April 2018 / 2 October 2018 Purpose : Runs the unsampled weight steps of the ips process Params : run_id - the id for the current run. Returns : None """ # Load configuration variables config = ServicesConfiguration().get_unsampled_weight() # Populate Survey Data For Unsampled Wt idm.populate_survey_data_for_step(run_id, config) # Populate Unsampled Data idm.populate_step_data(run_id, config) # Copy Unsampled Wt PVs For Survey Data idm.copy_step_pvs_for_survey_data(run_id, config) # Apply Unsampled Wt PV On Survey Data process_variables.process(dataset='survey', in_table_name='SAS_SURVEY_SUBSAMPLE', out_table_name='SAS_UNSAMPLED_OOH_SPV', in_id='serial') # Update Survey Data with Unsampled Wt PV Output idm.update_survey_data_with_step_pv_output(config) # Copy Unsampled Wt PVs For Unsampled Data idm.copy_step_pvs_for_step_data(run_id, config) # Apply Unsampled Wt PV On Unsampled Data process_variables.process(dataset='unsampled', in_table_name='SAS_UNSAMPLED_OOH_DATA', out_table_name='SAS_UNSAMPLED_OOH_PV', in_id='REC_ID') # Update Unsampled Data With PV Output idm.update_step_data_with_step_pv_output(config) # Retrieve data from SQL survey_data = db.get_table_values(idm.SAS_SURVEY_SUBSAMPLE_TABLE) unsampled_data = db.get_table_values(config["data_table"]) # Calculate Unsampled Weight output_data, summary_data = calculate_unsampled_weight.do_ips_unsampled_weight_calculation( df_surveydata=survey_data, serial_num='SERIAL', shift_weight='SHIFT_WT', nr_weight='NON_RESPONSE_WT', min_weight='MINS_WT', traffic_weight='TRAFFIC_WT', out_of_hours_weight="UNSAMP_TRAFFIC_WT", df_ustotals=unsampled_data, min_count_threshold=30) # Insert data to SQL db.insert_dataframe_into_table(config["temp_table"], output_data) db.insert_dataframe_into_table(config["sas_ps_table"], summary_data) # Update Survey Data With Unsampled Wt Results idm.update_survey_data_with_step_results(config) # Store Survey Data With Unsampled Wt Results idm.store_survey_data_with_step_results(run_id, config) # Store Unsampled Weight Summary idm.store_step_summary(run_id, config)
def test_store_step_summary(database_connection): # step_config and variables step_config = { "ps_table": "PS_SHIFT_DATA", "sas_ps_table": "SAS_PS_SHIFT_DATA", "ps_columns": [ "[RUN_ID]", "[SHIFT_PORT_GRP_PV]", "[ARRIVEDEPART]", "[WEEKDAY_END_PV]", "[AM_PM_NIGHT_PV]", "[MIGSI]", "[POSS_SHIFT_CROSS]", "[SAMP_SHIFT_CROSS]", "[MIN_SH_WT]", "[MEAN_SH_WT]", "[MAX_SH_WT]", "[COUNT_RESPS]", "[SUM_SH_WT]" ] } run_id = 'shift-wt-idm-test' folder = '/store_step_summary' # Set up test data/tables test_ps_data = pd.read_csv(TEST_DATA_DIR + folder + '/shift_wt_sas_ps_shift_data_test_input.csv') db.insert_dataframe_into_table(step_config["sas_ps_table"], test_ps_data, database_connection) # Run function return results idm.store_step_summary(run_id, database_connection, step_config) sql = """ SELECT * FROM {} WHERE RUN_ID = '{}' """.format(step_config["ps_table"], run_id) results = pd.read_sql(sql, database_connection) results.to_csv(TEST_DATA_DIR + folder + '/shift_wt_actual_results.csv', index=False) # Get and format results results = pd.read_csv(TEST_DATA_DIR + folder + '/shift_wt_actual_results.csv', dtype=object) test_results = pd.read_csv(TEST_DATA_DIR + folder + '/shift_wt_expected_results.csv', dtype=object) results.sort_values(by=[ 'SHIFT_PORT_GRP_PV', 'ARRIVEDEPART', 'WEEKDAY_END_PV', 'AM_PM_NIGHT_PV' ], inplace=True) results.index = range(0, len(results)) test_results.sort_values(by=[ 'SHIFT_PORT_GRP_PV', 'ARRIVEDEPART', 'WEEKDAY_END_PV', 'AM_PM_NIGHT_PV' ], inplace=True) test_results.index = range(0, len(test_results)) assert_frame_equal(results, test_results, check_dtype=False) # Assert temp tables had been cleansed in function results = db.get_table_values(step_config['sas_ps_table']) assert len(results) == 0 # Cleanse test inputs db.delete_from_table(step_config['ps_table'], 'RUN_ID', '=', run_id)