def test_copy_step_pvs_for_step_data_shift_weight(self, database_connection): step_config = { 'name': 'SHIFT_DATA', 'pv_table': 'SAS_SHIFT_PV', 'pv_columns': ["'SHIFT_PORT_GRP_PV'", "'WEEKDAY_END_PV'", "'AM_PM_NIGHT_PV'"], 'order': 0 } run_id = 'copy-step-pvs-for-step-data' # clean the tables before putting in data db.delete_from_table('PROCESS_VARIABLE_PY', 'RUN_ID', '=', run_id) db.delete_from_table(idm.SAS_PROCESS_VARIABLES_TABLE) # read test data and insert into remote database table test_data = pd.read_pickle(COPY_PV_PATH + 'copy_shift_weight_pvs_for_shift_data.pkl') db.insert_dataframe_into_table("PROCESS_VARIABLE_PY", test_data, database_connection) # run the test function (this inserts into 'SAS_PROCESS_VARIABLE' table in remote database) idm.copy_step_pvs_for_step_data(run_id, database_connection, step_config) # write the results back to csv, and read the csv back (this solves the data type matching issues) results = db.get_table_values('SAS_PROCESS_VARIABLE') temp_output = COPY_PV_PATH + 'copy_shift_weight_pvs_for_shift_data_results.csv' results.to_csv(temp_output, index=False) results = pd.read_csv(temp_output) # remove the temporary file os.remove(temp_output) # from the test data make a dataframe of the expected results pv_cols = [item.replace("'", "") for item in step_config['pv_columns']] test_inserted_data = test_data[test_data['PV_NAME'].isin(pv_cols)] test_inserted_data_2 = test_inserted_data[['PV_NAME', 'PV_DEF']] test_results = results[['PROCVAR_NAME', 'PROCVAR_RULE']] # check that the PROCVAR_NAME and PROCVAR_RULE string match the ones from test data for the required pvs only npt.assert_array_equal(test_inserted_data_2, test_results) # Assert step_configuration["pv_table"] has 0 records result = db.get_table_values(step_config['pv_table']) assert len(result) == 0 # Cleanse tables before continuing db.delete_from_table(idm.SAS_PROCESS_VARIABLES_TABLE) db.delete_from_table('PROCESS_VARIABLE_PY', 'RUN_ID', '=', run_id)
def airmiles_step(run_id): """ Author : Thomas Mahoney / Elinor Thorne Date : 30 April 2018 / 2 October 2018 Purpose : Runs the air miles calculation steps of the ips process Params : run_id - the id for the current run. connection - a connection object pointing at the database. Returns : NA Requirements : NA Dependencies : NA """ # Load configuration variables config = ServicesConfiguration().get_air_miles() # Populate Survey Data For Air Miles idm.populate_survey_data_for_step(run_id, config) # Retrieve data from SQL survey_data = db.get_table_values(idm.SAS_SURVEY_SUBSAMPLE_TABLE) # Calculate Air Miles survey_data_out = calculate_airmiles.do_ips_airmiles_calculation(df_surveydata=survey_data, var_serial='SERIAL') # Insert data to SQL db.insert_dataframe_into_table(config["temp_table"], survey_data_out) # Update Survey Data with Air Miles Results idm.update_survey_data_with_step_results(config) # Store Survey Data with Air Miles Results idm.store_survey_data_with_step_results(run_id, config)
def imbalance_weight_step(run_id): """ Author : Thomas Mahoney / Elinor Thorne Date : 30 April 2018 / 2 October 2018 Purpose : Runs the imbalance weight steps of the ips process Params : run_id - the id for the current run. connection - a connection object pointing at the database. Returns : NA """ # Load configuration variables config = ServicesConfiguration().get_imbalance_weight() # Populate Survey Data For Imbalance Wt idm.populate_survey_data_for_step(run_id, config) # Copy Imbalance Wt PVs For Survey Data idm.copy_step_pvs_for_survey_data(run_id, config) # Apply Imbalance Wt PVs On Survey Data process_variables.process(dataset='survey', in_table_name='SAS_SURVEY_SUBSAMPLE', out_table_name='SAS_IMBALANCE_SPV', in_id='serial') # Update Survey Data With Imbalance Wt PVs Output idm.update_survey_data_with_step_pv_output(config) # Retrieve data from SQL survey_data = db.get_table_values(idm.SAS_SURVEY_SUBSAMPLE_TABLE) # Calculate Imbalance Weight survey_data_out, summary_data_out = \ calculate_imb_weight.do_ips_imbweight_calculation(survey_data, serial="SERIAL", shift_weight="SHIFT_WT", non_response_weight="NON_RESPONSE_WT", min_weight="MINS_WT", traffic_weight="TRAFFIC_WT", oo_weight="UNSAMP_TRAFFIC_WT", imbalance_weight="IMBAL_WT") # Insert data to SQL db.insert_dataframe_into_table(config["temp_table"], survey_data_out) db.insert_dataframe_into_table(config["sas_ps_table"], summary_data_out) # Update Survey Data With Imbalance Wt Results idm.update_survey_data_with_step_results(config) # Store Survey Data With Imbalance Wt Results idm.store_survey_data_with_step_results(run_id, config) # Store Imbalance Weight Summary idm.store_step_summary(run_id, config)
def process(in_table_name, out_table_name, in_id, dataset): """ Author : Thomas Mahoney Date : 27 / 03 / 2018 Purpose : Runs the process variables step of the IPS calculation process. Parameters : in_table_name - the table where the data is coming from. out_table_name - the destination table where the modified data will be sent. in_id - the column id used in the output dataset (this is used when the data is merged into the main table later. dataset - an identifier for the dataset currently being processed. Returns : NA Requirements : NA Dependencies : NA """ # Ensure the input table name is capitalised in_table_name = in_table_name.upper() # Extract the table's content into a local dataframe df_data = db.get_table_values(in_table_name) # Fill nan values df_data.fillna(value=np.NaN, inplace=True) # Get the process variable statements process_variables = get_pvs() if dataset == 'survey': df_data = df_data.sort_values('SERIAL') # Apply process variables df_data = df_data.apply(modify_values, axis=1, args=(process_variables, dataset)) # Create a list to hold the PV column names updated_columns = [] # Loop through the pv's for pv in process_variables: updated_columns.append(pv[0].upper()) # Generate a column list from the in_id column and the pvs for the current run columns = [in_id] + updated_columns columns = [col.upper() for col in columns] # Create a new dataframe from the modified data using the columns specified df_out = df_data[columns] # for column in df_out: # if df_out[column].dtype == np.int64: # df_out[column] = df_out[column].astype(int) # Insert the dataframe to the output table db.insert_dataframe_into_table(out_table_name, df_out)
def minimums_weight_step(run_id): """ Author : Thomas Mahoney / Elinor Thorne Date : 30 April 2018 / 2 October 2018 Purpose : Runs the minimums weight steps of the ips process Params : run_id - the id for the current run. connection - a connection object pointing at the database. Returns : NA """ # Load configuration variables config = ServicesConfiguration().get_minimums_weight() # Populate Survey Data For Minimums Wt idm.populate_survey_data_for_step(run_id, config) # Copy Minimums Wt PVs For Survey Data idm.copy_step_pvs_for_survey_data(run_id, config) # Apply Minimums Wt PVs On Survey Data process_variables.process(dataset='survey', in_table_name='SAS_SURVEY_SUBSAMPLE', out_table_name='SAS_MINIMUMS_SPV', in_id='serial') # Update Survey Data with Minimums Wt PVs Output idm.update_survey_data_with_step_pv_output(config) # Retrieve data from SQL survey_data = db.get_table_values(idm.SAS_SURVEY_SUBSAMPLE_TABLE) # Calculate Minimums Weight output_data, summary_data = \ calculate_minimums_weight.do_ips_minweight_calculation(df_surveydata=survey_data, serial_num='SERIAL', shift_weight='SHIFT_WT', nr_weight='NON_RESPONSE_WT', min_weight='MINS_WT') # Insert data to SQL db.insert_dataframe_into_table(config["temp_table"], output_data) db.insert_dataframe_into_table(config["sas_ps_table"], summary_data) # Update Survey Data With Minimums Wt Results idm.update_survey_data_with_step_results(config) # Store Survey Data With Minimums Wt Results idm.store_survey_data_with_step_results(run_id, config) # Store Minimums Wt Summary idm.store_step_summary(run_id, config)
def test_copy_step_pvs_for_survey_data(step_name, pv_columns, spv_table, database_connection): # This test is parameterised. The values for the arguments of this test function # are taken from the parameters specified in pytest.mark.parametrize # Setup step configuration variables step_config = { 'name': step_name, 'spv_table': spv_table, 'pv_columns': pv_columns } run_id = 'TEMPLATE' idm.copy_step_pvs_for_survey_data(run_id, database_connection, step_config) # Get all values from the sas_process_variables table results = db.get_table_values(idm.SAS_PROCESS_VARIABLES_TABLE) # Copy the PV column names pv_names = step_config['pv_columns'] # Strip quotation marks out of the pv_names to use in column comparisons for i in range(0, len(pv_names)): pv_names[i] = pv_names[i].replace("'", "") # Check number of PV records moved matches number passed in through step configuration. assert len(results) == len(step_config['pv_columns']) # Ensure the pv_names in the results data frame match the expected pv names for name in results['PROCVAR_NAME']: assert name.upper() in pv_names # Get the spv_table values and ensure all records have been deleted results = db.get_table_values(step_config['spv_table']) assert len(results) == 0
def rail_imputation_step(run_id): """ Author : Thomas Mahoney / Elinor Thorne Date : 30 April 2018 / 2 October 2018 Purpose : Runs the rail imputation steps of the ips process Params : run_id - the id for the current run. connection - a connection object pointing at the database. Returns : NA """ # Load configuration variables config = ServicesConfiguration().get_rail_imputation() # Populate Survey Data For Rail Imputation idm.populate_survey_data_for_step(run_id, config) # Copy Rail Imp PVs For Survey Data idm.copy_step_pvs_for_survey_data(run_id, config) # Apply Rail Imp PVs On Survey Data process_variables.process(dataset='survey', in_table_name='SAS_SURVEY_SUBSAMPLE', out_table_name='SAS_RAIL_SPV', in_id='serial') # Update Survey Data with Rail Imp PV Output idm.update_survey_data_with_step_pv_output(config) # Retrieve data from SQL survey_data = db.get_table_values(idm.SAS_SURVEY_SUBSAMPLE_TABLE) # Calculate Rail Imputation survey_data_out = calculate_rail_imputation.do_ips_railex_imp( survey_data, var_serial='SERIAL', var_final_weight='FINAL_WT', minimum_count_threshold=30) # Insert data to SQL db.insert_dataframe_into_table(config["temp_table"], survey_data_out) # Update Survey Data With Rail Imp Results idm.update_survey_data_with_step_results(config) # Store Survey Data With Rail Imp Results idm.store_survey_data_with_step_results(run_id, config)
def fares_imputation_step(run_id): """ Author : Thomas Mahoney / Elinor Thorne Date : 30 April 2018 / 2 October 2018 Purpose : Runs the fares imputation steps of the ips process Params : run_id - the id for the current run. connection - a connection object pointing at the database. Returns : NA """ # Load configuration variables config = ServicesConfiguration().get_fares_imputation() # Populate Survey Data For Fares Imputation idm.populate_survey_data_for_step(run_id, config) # Copy Fares Imp PVs For Survey Data idm.copy_step_pvs_for_survey_data(run_id, config) # Apply Fares Imp PVs On Survey Data process_variables.process(dataset='survey', in_table_name='SAS_SURVEY_SUBSAMPLE', out_table_name='SAS_FARES_SPV', in_id='serial') # Update Survey Data with Fares Imp PV Output idm.update_survey_data_with_step_pv_output(config) # Retrieve data from SQL survey_data = db.get_table_values(idm.SAS_SURVEY_SUBSAMPLE_TABLE) # Calculate Fares Imputation survey_data_out = calculate_fares_imputation.do_ips_fares_imputation(survey_data, var_serial='SERIAL', num_levels=9, measure='mean') # Insert data to SQL db.insert_dataframe_into_table(config["temp_table"], survey_data_out) # Update Survey Data With Fares Imp Results idm.update_survey_data_with_step_results(config) # Store Survey Data With Fares Imp Results idm.store_survey_data_with_step_results(run_id, config)
def town_stay_expenditure_imputation_step(run_id): """ Author : Thomas Mahoney / Elinor Thorne Date : 30 April 2018 / 2 October 2018 Purpose : Runs the town stay expenditure imputation steps of the ips process Params : run_id - the id for the current run. connection - a connection object pointing at the database. Returns : NA """ # Load configuration variables config = ServicesConfiguration().get_town_and_stay_expenditure() # Populate Survey Data For TSE Imputation idm.populate_survey_data_for_step(run_id, config) # Copy TSE Imputation PVs For Survey Data idm.copy_step_pvs_for_survey_data(run_id, config) # Apply TSE Imputation PVs On Survey Data process_variables.process(dataset='survey', in_table_name='SAS_SURVEY_SUBSAMPLE', out_table_name='SAS_TOWN_STAY_SPV', in_id='serial') # Update Survey Data with TSE Imputation PV Output idm.update_survey_data_with_step_pv_output(config) # Retrieve data from SQL survey_data = db.get_table_values(idm.SAS_SURVEY_SUBSAMPLE_TABLE) # Calculate TSE Imputation survey_data_out = calculate_town_and_stay_expenditure.do_ips_town_exp_imp( survey_data, var_serial="SERIAL", var_final_wt="FINAL_WT") # Insert data to SQL db.insert_dataframe_into_table(config["temp_table"], survey_data_out) # Update Survey Data With TSE Imputation Results idm.update_survey_data_with_step_results(config) # Store Survey Data With TSE Imputation Results idm.store_survey_data_with_step_results(run_id, config)
def final_weight_step(run_id): """ Author : Thomas Mahoney / Elinor Thorne Date : 30 April 2018 / 2 October 2018 Purpose : Runs the final weight steps of the ips process Params : run_id - the id for the current run. connection - a connection object pointing at the database. Returns : NA """ # Load configuration variables config = ServicesConfiguration().get_final_weight() # Populate Survey Data For Final Wt idm.populate_survey_data_for_step(run_id, config) # Retrieve data from SQL survey_data = db.get_table_values(idm.SAS_SURVEY_SUBSAMPLE_TABLE) # Calculate Final Weight survey_data_out, summary_data_out = \ calculate_final_weight.do_ips_final_wt_calculation(survey_data, serial_num='SERIAL', shift_weight='SHIFT_WT', non_response_weight='NON_RESPONSE_WT', min_weight='MINS_WT', traffic_weight='TRAFFIC_WT', unsampled_weight='UNSAMP_TRAFFIC_WT', imbalance_weight='IMBAL_WT', final_weight='FINAL_WT') # Insert data to SQL db.insert_dataframe_into_table(config["temp_table"], survey_data_out) db.insert_dataframe_into_table(config["sas_ps_table"], summary_data_out) # Update Survey Data With Final Wt Results idm.update_survey_data_with_step_results(config) # Store Survey Data With Final Wt Results idm.store_survey_data_with_step_results(run_id, config) # Store Final Weight Summary idm.store_step_summary(run_id, config)
def do_ips_ges_weighting(df_surveydata: pd.DataFrame, df_ustotals: pd.DataFrame): # Deletes from poprowvec and survey_unsamp_aux tables db.delete_from_table('survey_unsamp_aux') # cf.drop_table('poprowvec_unsamp') # cf.drop_table('r_unsampled') db.clear_memory_table('poprowvec_unsamp') db.clear_memory_table('r_unsampled') # Call the GES weighting macro df_surveydata = df_surveydata.sort_values('SERIAL') r_survey_input(df_surveydata) r_population_input(df_surveydata, df_ustotals) run_r_ges_script() df_summarydata = db.get_table_values('r_unsampled') df_summarydata = df_summarydata[['SERIAL', 'UNSAMP_TRAFFIC_WT']] return df_surveydata, df_summarydata
def non_response_weight_step(run_id): """ Author : Thomas Mahoney / Elinor Thorne Date : 26 April 2018 / 2 October 2018 Purpose : Runs the non response weight steps of the ips process Params : run_id - the id for the current run. Returns : NA """ # Load configuration variables config = ServicesConfiguration().get_non_response() # Populate Survey Data For Non Response Wt idm.populate_survey_data_for_step(run_id, config) # Populate Non Response Data idm.populate_step_data(run_id, config) # Copy Non Response Wt PVs For Survey Data idm.copy_step_pvs_for_survey_data(run_id, config) # Apply Non Response Wt PVs On Survey Data process_variables.process(dataset='survey', in_table_name='SAS_SURVEY_SUBSAMPLE', out_table_name='SAS_NON_RESPONSE_SPV', in_id='serial') # Update Survey Data with Non Response Wt PVs Output idm.update_survey_data_with_step_pv_output(config) # Copy Non Response Wt PVs for Non Response Data idm.copy_step_pvs_for_step_data(run_id, config) # Apply Non Response Wt PVs On Non Response Data process_variables.process(dataset='non_response', in_table_name='SAS_NON_RESPONSE_DATA', out_table_name='SAS_NON_RESPONSE_PV', in_id='REC_ID') # Update NonResponse Data With PVs Output idm.update_step_data_with_step_pv_output(config) # Retrieve data from SQL survey_data = db.get_table_values(idm.SAS_SURVEY_SUBSAMPLE_TABLE) non_response_data = db.get_table_values(config["data_table"]) # Calculate Non Response Weight survey_data_out, summary_data_out = \ calculate_nonresponse_weight.do_ips_nrweight_calculation(survey_data, non_response_data, 'NON_RESPONSE_WT', 'SERIAL') db.insert_dataframe_into_table(config["temp_table"], survey_data_out) db.insert_dataframe_into_table(config["sas_ps_table"], summary_data_out) # Update Survey Data With Non Response Wt Results idm.update_survey_data_with_step_results(config) # Store Survey Data With NonResponse Wt Results idm.store_survey_data_with_step_results(run_id, config) # Store Non Response Wt Summary idm.store_step_summary(run_id, config)
def read(): return db.get_table_values(table)
def test_update_step_data_with_step_pv_output(self, database_connection): # step_config and variables step_config = { "pv_columns2": ["[SHIFT_PORT_GRP_PV]", "[WEEKDAY_END_PV]", "[AM_PM_NIGHT_PV]"], "pv_table": "SAS_SHIFT_PV", "data_table": "SAS_SHIFT_DATA", "temp_table": "SAS_SHIFT_WT", "sas_ps_table": "SAS_PS_SHIFT_DATA" } # Set up test data/tables test_shift_pv_data = pd.read_csv( UPDATE_STEP_DATA_WITH_STEP_PV_OUTPUT_PATH + 'test_shift_pv_data.csv') # Get rec_id and amend test dataframe rec_id = self.get_rec_id("MAX", step_config["data_table"], database_connection) test_shift_pv_data = self.amend_rec_id(test_shift_pv_data, rec_id, ascend=False) db.insert_dataframe_into_table(step_config['pv_table'], test_shift_pv_data, database_connection) # run the test function idm.update_step_data_with_step_pv_output(database_connection, step_config) # write the results back to csv, and read the csv back (this solves the data type matching issues) results = db.get_table_values(step_config['data_table']) temp_output = UPDATE_STEP_DATA_WITH_STEP_PV_OUTPUT_PATH + 'copy_update_step_data_with_step_pv_output.csv' results.to_csv(temp_output, index=False) results = pd.read_csv(temp_output) # get the unique REC_ID of the test_shift_pv_data rec_id = test_shift_pv_data["REC_ID"] # select all rows with matching updated rec_id results_1 = results[results['REC_ID'].isin(rec_id)] # create column list of pvs cols_temp = [ item.replace("[", "") for item in step_config['pv_columns2'] ] cols_to_keep = [item.replace("]", "") for item in cols_temp] cols_to_keep.insert(0, "REC_ID") # keep only the required columns from results_1 and importantly reset index and drop it results_2 = results_1[cols_to_keep] results_3 = results_2.reset_index(drop=True) # sort rows in test_shift_pv_data by REC_ID and importantly reset index and drop it sorted_test_shift_pv_data_1 = test_shift_pv_data.sort_values( by=['REC_ID']) sorted_test_shift_pv_data_2 = sorted_test_shift_pv_data_1.reset_index( drop=True) # check that the two dataframes match assert_frame_equal(results_3, sorted_test_shift_pv_data_2, check_names=False, check_like=True, check_dtype=False) # Assert temp tables had been cleanse in function results = db.get_table_values(step_config['pv_table']) assert len(results) == 0 results = db.get_table_values(step_config['temp_table']) assert len(results) == 0 results = db.get_table_values(idm.SAS_PROCESS_VARIABLES_TABLE) assert len(results) == 0 results = db.get_table_values(step_config['sas_ps_table']) assert len(results) == 0
def test_update_survey_data_with_step_pv_output_with_name_minimums_weight( self, database_connection): step_config = { 'name': "MINIMUMS_WEIGHT", 'spv_table': 'SAS_MINIMUMS_SPV', "pv_columns": [ "'MINS_FLAG_PV'", "'MINS_PORT_GRP_PV'", "'MINS_CTRY_GRP_PV'", "'MINS_NAT_GRP_PV'", "'MINS_CTRY_PORT_GRP_PV'" ], "temp_table": "SAS_MINIMUMS_WT", "sas_ps_table": "SAS_PS_MINIMUMS", } run_id = 'update-survey-pvs' # delete the data in the table so that we have no data in table for test db.delete_from_table(idm.SAS_SURVEY_SUBSAMPLE_TABLE) db.delete_from_table(step_config['spv_table']) # read and insert into the database the survey data test_survey_data = pd.read_pickle(STEP_PV_OUTPUT_PATH + 'update_survey_data_pvs.pkl') db.insert_dataframe_into_table(idm.SAS_SURVEY_SUBSAMPLE_TABLE, test_survey_data, database_connection) # read and insert into the database the pvs test_nr_pv_data = pd.read_csv(STEP_PV_OUTPUT_PATH + 'test_mw_pv_data.csv') db.insert_dataframe_into_table(step_config['spv_table'], test_nr_pv_data, database_connection) # call the test function idm.update_survey_data_with_step_pv_output(database_connection, step_config) # get the newly updated table data write the results back to csv to read back and resolve formatting results = db.get_table_values(idm.SAS_SURVEY_SUBSAMPLE_TABLE) # write the results back to csv, and read the csv back (this solves the data type matching issues) temp_output = STEP_PV_OUTPUT_PATH + 'update_survey_data_pvs_result_results.csv' results.to_csv(temp_output, index=False) results = pd.read_csv(temp_output) # remove the temporary written file os.remove(temp_output) # clean test data before actually testing results db.delete_from_table(idm.SAS_SURVEY_SUBSAMPLE_TABLE) db.delete_from_table(step_config['spv_table']) # check ONLY updated pv columns are as expected in results, check NaN values are handled correctly stripped_pv_cols = [ item.replace("'", "") for item in step_config['pv_columns'] ] stripped_pv_cols.insert(0, 'SERIAL') # add the SERIAL column test_dummy_1 = results[stripped_pv_cols] # get the SERIAL column values as a list, and select rows from updated data that match input data serials = test_nr_pv_data['SERIAL'] test_dummy_2 = test_dummy_1[test_dummy_1['SERIAL'].isin(serials)] # clean test data before actually testing results db.delete_from_table(idm.SAS_SURVEY_SUBSAMPLE_TABLE) db.delete_from_table(step_config['spv_table']) # check updated pv columns match the corresponding dummy values assert_frame_equal(test_dummy_2, test_nr_pv_data, check_dtype=False, check_like=True) # check that the non-pv column values are still the same by dropping pv columns columns_to_drop = [ item.replace("'", "") for item in step_config['pv_columns'] ] new_res = results.drop(columns_to_drop, axis=1) new_test_res = test_survey_data.drop(columns_to_drop, axis=1) assert_frame_equal(new_res, new_test_res, check_dtype=False, check_like=True) # check that spv_table has been deleted results_2 = db.get_table_values(step_config['spv_table']) assert len(results_2) == 0 results = db.get_table_values(idm.SAS_PROCESS_VARIABLES_TABLE) assert len(results) == 0 results = db.get_table_values(step_config["temp_table"]) assert len(results) == 0 results = db.get_table_values(step_config["sas_ps_table"]) assert len(results) == 0
def test_store_step_summary(database_connection): # step_config and variables step_config = { "ps_table": "PS_SHIFT_DATA", "sas_ps_table": "SAS_PS_SHIFT_DATA", "ps_columns": [ "[RUN_ID]", "[SHIFT_PORT_GRP_PV]", "[ARRIVEDEPART]", "[WEEKDAY_END_PV]", "[AM_PM_NIGHT_PV]", "[MIGSI]", "[POSS_SHIFT_CROSS]", "[SAMP_SHIFT_CROSS]", "[MIN_SH_WT]", "[MEAN_SH_WT]", "[MAX_SH_WT]", "[COUNT_RESPS]", "[SUM_SH_WT]" ] } run_id = 'shift-wt-idm-test' folder = '/store_step_summary' # Set up test data/tables test_ps_data = pd.read_csv(TEST_DATA_DIR + folder + '/shift_wt_sas_ps_shift_data_test_input.csv') db.insert_dataframe_into_table(step_config["sas_ps_table"], test_ps_data, database_connection) # Run function return results idm.store_step_summary(run_id, database_connection, step_config) sql = """ SELECT * FROM {} WHERE RUN_ID = '{}' """.format(step_config["ps_table"], run_id) results = pd.read_sql(sql, database_connection) results.to_csv(TEST_DATA_DIR + folder + '/shift_wt_actual_results.csv', index=False) # Get and format results results = pd.read_csv(TEST_DATA_DIR + folder + '/shift_wt_actual_results.csv', dtype=object) test_results = pd.read_csv(TEST_DATA_DIR + folder + '/shift_wt_expected_results.csv', dtype=object) results.sort_values(by=[ 'SHIFT_PORT_GRP_PV', 'ARRIVEDEPART', 'WEEKDAY_END_PV', 'AM_PM_NIGHT_PV' ], inplace=True) results.index = range(0, len(results)) test_results.sort_values(by=[ 'SHIFT_PORT_GRP_PV', 'ARRIVEDEPART', 'WEEKDAY_END_PV', 'AM_PM_NIGHT_PV' ], inplace=True) test_results.index = range(0, len(test_results)) assert_frame_equal(results, test_results, check_dtype=False) # Assert temp tables had been cleansed in function results = db.get_table_values(step_config['sas_ps_table']) assert len(results) == 0 # Cleanse test inputs db.delete_from_table(step_config['ps_table'], 'RUN_ID', '=', run_id)
def test_store_survey_data_with_step_results(step_name, nullify_pvs, ps_table, prefix, database_connection): """ # This test is parameterised. The values for the arguments of this test function # are taken from the parameters specified in pytest.mark.parametrize # see https://docs.pytest.org/en/latest/parametrize.html """ # step_config and variables step_config = { "name": step_name, "nullify_pvs": nullify_pvs, "ps_table": ps_table } run_id = 'store_survey_data_test' folder = '/store_survey_data_with_step_results' applicable_ps_tables = [ "SHIFT_WEIGHT", "NON_RESPONSE", "MINIMUMS_WEIGHT", "TRAFFIC_WEIGHT", "UNSAMPLED_WEIGHT", "IMBALANCE_WEIGHT", "FINAL_WEIGHT" ] # Cleanse and delete test inputs db.delete_from_table(idm.SURVEY_SUBSAMPLE_TABLE, 'RUN_ID', '=', run_id) db.delete_from_table(step_config['ps_table'], 'RUN_ID', '=', run_id) # Set up records in SURVEY_SUBSAMPLE with above run_id survey_subsample_input = pd.read_csv(TEST_DATA_DIR + folder + prefix + 'survey_subsample_test_input.csv', dtype=object) db.insert_dataframe_into_table(idm.SURVEY_SUBSAMPLE_TABLE, survey_subsample_input, database_connection, fast=False) # Set up records in SAS_SURVEY_SUBSAMPLE with above run_id sas_survey_subsample_input = pd.read_csv(TEST_DATA_DIR + folder + prefix + 'sss_test_input.csv', dtype=object) db.insert_dataframe_into_table(idm.SAS_SURVEY_SUBSAMPLE_TABLE, sas_survey_subsample_input, database_connection, fast=False) # Set up records in ps_table with above run_id if step_name in applicable_ps_tables: ps_shift_data_input = pd.read_csv(TEST_DATA_DIR + folder + prefix + 'summary_table_test_input.csv', dtype=object) db.insert_dataframe_into_table(step_config['ps_table'], ps_shift_data_input, database_connection, fast=False) # Run function idm.store_survey_data_with_step_results(run_id, database_connection, step_config) # Assert tables were cleansed by function if step_name in applicable_ps_tables: sql = """ SELECT * FROM {} WHERE RUN_ID = '{}'""".format(step_config['ps_table'], run_id) cur = database_connection.cursor() result = cur.execute(sql).fetchone() assert result is None result = db.get_table_values(idm.SAS_SURVEY_SUBSAMPLE_TABLE) assert len(result) == 0 # Retrieve results produced by function sql = """ SELECT * FROM {} WHERE RUN_ID = '{}' """.format(idm.SURVEY_SUBSAMPLE_TABLE, run_id) results = pd.read_sql(sql, database_connection) results.to_csv(TEST_DATA_DIR + folder + prefix + 'actual_results.csv', index=False) # Get and format results results = pd.read_csv(TEST_DATA_DIR + folder + prefix + 'actual_results.csv', dtype=object) test_results = pd.read_csv(TEST_DATA_DIR + folder + prefix + 'expected_result.csv', dtype=object) results.sort_values(by=["SERIAL"], inplace=True) results.index = range(0, len(results)) test_results.sort_values(by=["SERIAL"], inplace=True) test_results.index = range(0, len(test_results)) assert_frame_equal(results, test_results, check_dtype=False)
def traffic_weight_step(run_id): """ Author : Thomas Mahoney / Elinor Thorne Date : 30 April 2018 / 2 October 2018 Purpose : Runs the traffic weight steps of the ips process Params : run_id - the id for the current run. connection - a connection object pointing at the database. Returns : NA """ # Load configuration variables config = ServicesConfiguration().get_traffic_weight() # Populate Survey Data For Traffic Wt idm.populate_survey_data_for_step(run_id, config) # Populate Traffic Data idm.populate_step_data(run_id, config) # Copy Traffic Wt PVs For Survey Data idm.copy_step_pvs_for_survey_data(run_id, config) # Apply Traffic Wt PV On Survey Data process_variables.process(dataset='survey', in_table_name='SAS_SURVEY_SUBSAMPLE', out_table_name='SAS_TRAFFIC_SPV', in_id='serial') # Update Survey Data with Traffic Wt PV Output idm.update_survey_data_with_step_pv_output(config) # Copy Traffic Wt PVs For Traffic Data idm.copy_step_pvs_for_step_data(run_id, config) # Apply Traffic Wt PV On Traffic Data process_variables.process(dataset='traffic', in_table_name='SAS_TRAFFIC_DATA', out_table_name='SAS_TRAFFIC_PV', in_id='REC_ID') # Update Traffic Data With Traffic Wt PV Output idm.update_step_data_with_step_pv_output(config) # Retrieve data from SQL survey_data = db.get_table_values(idm.SAS_SURVEY_SUBSAMPLE_TABLE) traffic_data = db.get_table_values(config["data_table"]) # Calculate Traffic Weight output_data, summary_data = do_ips_trafweight_calculation_with_R( survey_data, traffic_data) # Insert data to SQL db.insert_dataframe_into_table(config["temp_table"], output_data) db.insert_dataframe_into_table(config["sas_ps_table"], summary_data) # Update Survey Data With Traffic Wt Results idm.update_survey_data_with_step_results(config) # Store Survey Data With Traffic Wt Results idm.store_survey_data_with_step_results(run_id, config) # Store Traffic Wt Summary idm.store_step_summary(run_id, config)
def test_update_survey_data_with_step_results(step_name, temp_table, results_columns, prefix, database_connection): """ # This test is parameterised. The values for the arguments of this test function # are taken from the parameters specified in pytest.mark.parametrize # see https://docs.pytest.org/en/latest/parametrize.html """ # step_config and variables step_config = { "name": step_name, "temp_table": temp_table, "results_columns": results_columns } folder = '/update_survey_data_with_step_results' # Cleanse and set up test data/tables db.delete_from_table(idm.SAS_SURVEY_SUBSAMPLE_TABLE) sas_survey_subsample_input = pd.read_csv( TEST_DATA_DIR + folder + prefix + 'sas_survey_subsample_test_input.csv', dtype=object) db.insert_dataframe_into_table(idm.SAS_SURVEY_SUBSAMPLE_TABLE, sas_survey_subsample_input, database_connection, fast=False) db.delete_from_table(step_config["temp_table"]) sas_shift_wt_input = pd.read_csv(TEST_DATA_DIR + folder + prefix + 'temp_table_test_input.csv', dtype=object) db.insert_dataframe_into_table(step_config["temp_table"], sas_shift_wt_input, database_connection, fast=False) # Run function idm.update_survey_data_with_step_results(database_connection, step_config) # Get and format results results = db.get_table_values(idm.SAS_SURVEY_SUBSAMPLE_TABLE) results.to_csv(TEST_DATA_DIR + folder + prefix + 'actual_results.csv', index=False) results = pd.read_csv(TEST_DATA_DIR + folder + prefix + 'actual_results.csv', dtype=object) test_results = pd.read_csv(TEST_DATA_DIR + folder + prefix + 'expected_results.csv', dtype=object) results.sort_values(by=["SERIAL"], inplace=True) results.index = range(0, len(results)) test_results.sort_values(by=["SERIAL"], inplace=True) test_results.index = range(0, len(test_results)) assert_frame_equal(results, test_results, check_dtype=False) # Assert temp tables had been cleansed in function result = db.get_table_values(step_config['temp_table']) assert len(result) == 0
def do_ips_trafweight_calculation_with_R(survey_data, trtotals): # clear the auxillary tables db.delete_from_table(SURVEY_TRAFFIC_AUX_TABLE) # drop aux tables and r created tables # cf.drop_table(POP_PROWVEC_TABLE) # cf.drop_table(R_TRAFFIC_TABLE) db.clear_memory_table(R_TRAFFIC_TABLE) db.clear_memory_table(POP_PROWVEC_TABLE) # inserts into survey_traffic_aux a.k.a. SURVEY_TRAFFIC_AUX_TABLE df_r_ges_input_imported = r_survey_input(survey_data) # inserts into POP_PROWVEC_TABLE df_mod_pop_totals_import = r_population_input(survey_data, trtotals) run_r_ges_script() # grab the data from the SQL table and return output_final_import = db.get_table_values(R_TRAFFIC_TABLE) ret_out = output_final_import[[SERIAL, TRAFFIC_WT]] # sort ret_out_sorted = ret_out.sort_values(SERIAL) ret_out_final = ret_out_sorted.reset_index(drop=True) # copy out the df without random for generate_ips_tw_summary df_ret_out_final_not_rounded = ret_out_final.copy() # Round the weights to 3dp ret_out_final[TRAFFIC_WT] = ret_out_final[TRAFFIC_WT].apply( lambda x: round(x, 3)) # ################################# # Generate the summary table # ################################# # perform calculation survey_data[TRAFFIC_DESIGN_WEIGHT_COLUMN] = survey_data[ var_shiftWeight] * survey_data[var_NRWeight] * survey_data[ var_minWeight] # Summarise the population totals over the strata df_PopTotals = trtotals.sort_values(STRATA) # Re-index the data frame df_PopTotals.index = range(df_PopTotals.shape[0]) df_popTotals = df_PopTotals.groupby(STRATA)[TRAFFIC_TOTAL_COLUMN] \ .agg([(TRAFFIC_TOTAL_COLUMN, 'sum')]) \ .reset_index() # ensure unrounded df_ret_out_final_not_rounded is supplied df_summary_merge_sum_traftot = generate_ips_tw_summary( survey_data, df_ret_out_final_not_rounded, var_serialNum, GWeightVar, df_popTotals, minCountThresh) # update the output SQL tables db.insert_dataframe_into_table(OUTPUT_TABLE_NAME, ret_out_final) db.insert_dataframe_into_table(SUMMARY_TABLE_NAME, df_summary_merge_sum_traftot) return ret_out_final, df_summary_merge_sum_traftot
def convert_dataframe_to_sql_format(table_name, dataframe): db.insert_dataframe_into_table(table_name, dataframe) return db.get_table_values(table_name)
def r_population_input(df_survey_input, df_tr_totals): """ Author : David Powell / edits by Nassir Mohammad Date : 07/06/2018 Purpose : Creates population data that feeds into the R GES weighting Parameters : df_survey_input - A data frame containing the survey data for processing month trtotals - A data frame containing population information for processing year Returns : A data frame containing the information needed for GES weighting Requirements : NA Dependencies : NA """ # Sort input values sort1 = [SAMP_PORT_GRP_PV, ARRIVEDEPART] df_survey_input_sorted = df_survey_input.sort_values(sort1) # Cleanse data df_survey_input_sorted = df_survey_input_sorted[ ~df_survey_input_sorted[SAMP_PORT_GRP_PV].isnull()] df_survey_input_sorted = df_survey_input_sorted[ ~df_survey_input_sorted[ARRIVEDEPART].isnull()] # Sort input values df_pop_totals = df_tr_totals.sort_values(sort1) # Cleanse data df_pop_totals = df_pop_totals[~df_pop_totals[SAMP_PORT_GRP_PV].isnull()] df_pop_totals = df_pop_totals[~df_pop_totals[ARRIVEDEPART].isnull()] # Create unique list of items from survey input items = df_survey_input_sorted[SAMP_PORT_GRP_PV].tolist() unique = [] [unique.append(x) for x in items if x not in unique] df_pop_totals_match = df_pop_totals[df_pop_totals[SAMP_PORT_GRP_PV].isin( unique)] # Create traffic totals df_pop_totals_match = df_pop_totals_match.sort_values( [ARRIVEDEPART, SAMP_PORT_GRP_PV]) df_traffic_totals = df_pop_totals_match.groupby( [SAMP_PORT_GRP_PV, ARRIVEDEPART]).agg({ TRAFFIC_TOTAL_COLUMN: 'sum' }).reset_index() # Create lookup. Group by and aggregate lookup_dataframe = df_survey_input_sorted.copy() lookup_dataframe["count"] = "" lookup_dataframe = lookup_dataframe.groupby( [SAMP_PORT_GRP_PV, ARRIVEDEPART]).agg({ "count": 'count' }).reset_index() # Cleanse data # lookup_dataframe.drop(["count"], axis=1) lookup_dataframe[T1] = range(len(lookup_dataframe)) lookup_dataframe[T1] = lookup_dataframe[T1] + 1 # Create population totals for current survey data - Cleanse data and merge lookup_dataframe_aux = lookup_dataframe[[ SAMP_PORT_GRP_PV, ARRIVEDEPART, T1 ]] lookup_dataframe_aux[T1] = lookup_dataframe_aux.T1.astype(np.int64) df_mod_totals = pd.merge(df_traffic_totals, lookup_dataframe_aux, on=[SAMP_PORT_GRP_PV, ARRIVEDEPART], how='left') df_mod_totals[MODEL_GROUP] = 1 df_mod_totals = df_mod_totals.drop( columns=[ARRIVEDEPART, SAMP_PORT_GRP_PV]) df_mod_pop_totals = df_mod_totals.pivot_table(index=MODEL_GROUP, columns=T1, values=TRAFFIC_TOTAL_COLUMN) df_mod_pop_totals = df_mod_pop_totals.add_prefix('T_') df_mod_pop_totals[MODEL_GROUP] = 1 cols = [MODEL_GROUP ] + [col for col in df_mod_pop_totals if col != MODEL_GROUP] df_mod_pop_totals = df_mod_pop_totals[cols] df_mod_pop_totals = df_mod_pop_totals.reset_index(drop=True) con = db.get_sql_connection() # recreate proc_vec table # note the index gets added so needs to be removed when re-read from SQL df_mod_pop_totals.to_sql(POP_PROWVEC_TABLE, con, if_exists='replace') df_mod_pop_totals_import = db.get_table_values(POP_PROWVEC_TABLE) df_mod_pop_totals_import = df_mod_pop_totals_import.drop('index', axis=1) return df_mod_pop_totals_import
def shift_weight_step(run_id): """ Author : Thomas Mahoney / Elinor Thorne Date : 26 April 2018 / 2 October 2018 Purpose : Runs the shift weight steps of the ips process Params : run_id - the id for the current run. connection - a connection object pointing at the database. Returns : NA """ # Load configuration variables config = ServicesConfiguration().get_shift_weight() # Populate Survey Data For Shift Wt idm.populate_survey_data_for_step(run_id, config) # Populate Shift Data idm.populate_step_data(run_id, config) # Copy Shift Wt PVs For Survey Data idm.copy_step_pvs_for_survey_data(run_id, config) # Apply Shift Wt PVs On Survey Data process_variables.process(dataset='survey', in_table_name='SAS_SURVEY_SUBSAMPLE', out_table_name='SAS_SHIFT_SPV', in_id='serial') # Update Survey Data with Shift Wt PV Output idm.update_survey_data_with_step_pv_output(config) # Copy Shift Wt PVs For Shift Data idm.copy_step_pvs_for_step_data(run_id, config) # Apply Shift Wt PVs On Shift Data process_variables.process(dataset='shift', in_table_name='SAS_SHIFT_DATA', out_table_name='SAS_SHIFT_PV', in_id='REC_ID') # Update Shift Data with PVs Output idm.update_step_data_with_step_pv_output(config) # Retrieve data from SQL survey_data = db.get_table_values(idm.SAS_SURVEY_SUBSAMPLE_TABLE) shift_data = db.get_table_values(config["data_table"]) # shift_data = sas_shift_schema.convert_dtype(shift_data) # Calculate Shift Weight survey_data_out, summary_data_out = \ calculate_shift_weight.do_ips_shift_weight_calculation(survey_data, shift_data, serial_number='SERIAL', shift_weight='SHIFT_WT') # Insert data to SQL db.insert_dataframe_into_table(config["temp_table"], survey_data_out) db.insert_dataframe_into_table(config["sas_ps_table"], summary_data_out) # Update Survey Data With Shift Wt Results idm.update_survey_data_with_step_results(config) # Store Survey Data With Shift Wt Results idm.store_survey_data_with_step_results(run_id, config) # Store Shift Wt Summary idm.store_step_summary(run_id, config)
def unsampled_weight_step(run_id): """ Author : Thomas Mahoney / Elinor Thorne Date : 30 April 2018 / 2 October 2018 Purpose : Runs the unsampled weight steps of the ips process Params : run_id - the id for the current run. Returns : None """ # Load configuration variables config = ServicesConfiguration().get_unsampled_weight() # Populate Survey Data For Unsampled Wt idm.populate_survey_data_for_step(run_id, config) # Populate Unsampled Data idm.populate_step_data(run_id, config) # Copy Unsampled Wt PVs For Survey Data idm.copy_step_pvs_for_survey_data(run_id, config) # Apply Unsampled Wt PV On Survey Data process_variables.process(dataset='survey', in_table_name='SAS_SURVEY_SUBSAMPLE', out_table_name='SAS_UNSAMPLED_OOH_SPV', in_id='serial') # Update Survey Data with Unsampled Wt PV Output idm.update_survey_data_with_step_pv_output(config) # Copy Unsampled Wt PVs For Unsampled Data idm.copy_step_pvs_for_step_data(run_id, config) # Apply Unsampled Wt PV On Unsampled Data process_variables.process(dataset='unsampled', in_table_name='SAS_UNSAMPLED_OOH_DATA', out_table_name='SAS_UNSAMPLED_OOH_PV', in_id='REC_ID') # Update Unsampled Data With PV Output idm.update_step_data_with_step_pv_output(config) # Retrieve data from SQL survey_data = db.get_table_values(idm.SAS_SURVEY_SUBSAMPLE_TABLE) unsampled_data = db.get_table_values(config["data_table"]) # Calculate Unsampled Weight output_data, summary_data = calculate_unsampled_weight.do_ips_unsampled_weight_calculation( df_surveydata=survey_data, serial_num='SERIAL', shift_weight='SHIFT_WT', nr_weight='NON_RESPONSE_WT', min_weight='MINS_WT', traffic_weight='TRAFFIC_WT', out_of_hours_weight="UNSAMP_TRAFFIC_WT", df_ustotals=unsampled_data, min_count_threshold=30) # Insert data to SQL db.insert_dataframe_into_table(config["temp_table"], output_data) db.insert_dataframe_into_table(config["sas_ps_table"], summary_data) # Update Survey Data With Unsampled Wt Results idm.update_survey_data_with_step_results(config) # Store Survey Data With Unsampled Wt Results idm.store_survey_data_with_step_results(run_id, config) # Store Unsampled Weight Summary idm.store_step_summary(run_id, config)
def test_populate_survey_data(name, delete_tables, nullify_pvs, database_connection): # This test is parameterised. The values for the arguments of this test function # are taken from the parameters specified in pytest.mark.parametrize # Delete existing survey data from table where RUN_ID matches our test id db.delete_from_table(SURVEY_SUBSAMPLE_TABLE, 'RUN_ID', '=', '9e5c1872-3f8e-4ae5-85dc-c67a602d011e') # Read the test data in from a csv file test_data = pd.read_csv(TEST_DATA_DIR + "populate_survey_data/survey_subsample.csv", dtype=object) # Insert the test data into survey_subsample table db.insert_dataframe_into_table(idm.SURVEY_SUBSAMPLE_TABLE, test_data) # Setup step configuration step_config = { 'nullify_pvs': nullify_pvs, 'name': name, 'delete_tables': delete_tables } # Run test function idm.populate_survey_data_for_step( run_id='9e5c1872-3f8e-4ae5-85dc-c67a602d011e', conn=database_connection, step_configuration=step_config) # Get test_result from sas_survey_subsample table test_result = db.get_table_values(idm.SAS_SURVEY_SUBSAMPLE_TABLE) # Write the test results to a csv test_result.to_csv(TEST_DATA_DIR + "populate_survey_data/test_result.csv", index=False) # Import the expected result (this result varies if the TRAFFIC_WEIGHT or UNSAMPLED_WEIGHT step is being tested) if name == 'TRAFFIC_WEIGHT' or name == 'UNSAMPLED_WEIGHT': expected_result = pd.read_csv( TEST_DATA_DIR + "populate_survey_data/populate_result_traffic_unsampled.csv") else: expected_result = pd.read_csv( TEST_DATA_DIR + "populate_survey_data/populate_result.csv") # Import the test result test_result = pd.read_csv(TEST_DATA_DIR + "populate_survey_data/test_result.csv") # Sort the values by SERIAL expected_result = expected_result.sort_values(by='SERIAL') test_result = test_result.sort_values(by='SERIAL') # Reset the dataframe's indexes so correct rows are compared expected_result.index = range(0, len(expected_result)) test_result.index = range(0, len(test_result)) # Check all deleted tables are empty for table in step_config['delete_tables']: delete_result = db.get_table_values(table) assert delete_result.empty # Check all nullified columns are NULL for column in step_config['nullify_pvs']: column_name = column.replace('[', '').replace(']', '') result = db.select_data(column_name, idm.SURVEY_SUBSAMPLE_TABLE, 'RUN_ID', "9e5c1872-3f8e-4ae5-85dc-c67a602d011e") assert result[column_name].isnull().sum() == len(result) # Check results match assert_frame_equal(expected_result, test_result, check_dtype=False, check_like=True)
def test_populate_step_data(table_name, data_table, insert_to_populate, step_data, sas_step_data, result_data, database_connection): # This test is parameterised. The values for the arguments of this test function # are taken from the parameters specified in pytest.mark.parametrize run_id = '9e5c1872-3f8e-4ae5-85dc-c67a602d011e' # Setup step configuration step_config = { "table_name": table_name, "data_table": data_table, "insert_to_populate": insert_to_populate, } # Clear existing test records from the shift_data table db.delete_from_table(step_config['table_name'], 'RUN_ID', '=', '9e5c1872-3f8e-4ae5-85dc-c67a602d011e') # Get test data from file test_data = pd.read_csv(TEST_DATA_DIR + "populate_step_data/" + step_data, dtype=object) # Insert test data into table db.insert_dataframe_into_table(step_config["table_name"], test_data) # Run XML step which deletes old data from sas_survey_subsample and repopulates it with the new data idm.populate_step_data(run_id, database_connection, step_config) # Get test_result from (sas) external data table test_result = db.get_table_values(step_config['data_table']) # Write the test results to a csv test_result.to_csv(TEST_DATA_DIR + "populate_step_data/" + result_data, index=False) # Import both the expected result and test result from the csv files expected_result = pd.read_csv(TEST_DATA_DIR + "populate_step_data/" + sas_step_data) test_result = pd.read_csv(TEST_DATA_DIR + "populate_step_data/" + result_data) # Nullify the rec_id for comparison (this needs to be done because the expected result contains no rec_id) expected_result['REC_ID'] = '' test_result['REC_ID'] = '' # Sort records to match order if table_name == 'SHIFT_DATA': expected_result = expected_result.sort_values( by=['PORTROUTE', 'WEEKDAY']) test_result = test_result.sort_values(by=['PORTROUTE', 'WEEKDAY']) elif table_name == 'NON_RESPONSE_DATA': expected_result = expected_result.sort_values(by=[ 'PORTROUTE', 'WEEKDAY', 'ARRIVEDEPART', 'AM_PM_NIGHT', 'SAMPINTERVAL', 'MIGTOTAL', 'ORDTOTAL' ]) test_result = test_result.sort_values(by=[ 'PORTROUTE', 'WEEKDAY', 'ARRIVEDEPART', 'AM_PM_NIGHT', 'SAMPINTERVAL', 'MIGTOTAL', 'ORDTOTAL' ]) elif table_name == 'UNSAMPLED_OOH_DATA': expected_result = expected_result.sort_values( by=['PORTROUTE', 'REGION', 'ARRIVEDEPART', 'UNSAMP_TOTAL']) test_result = test_result.sort_values( by=['PORTROUTE', 'REGION', 'ARRIVEDEPART', 'UNSAMP_TOTAL']) elif table_name == 'TRAFFIC_DATA': expected_result = expected_result.sort_values( by=['PORTROUTE', 'ARRIVEDEPART', 'TRAFFICTOTAL', 'HAUL']) test_result = test_result.sort_values( by=['PORTROUTE', 'ARRIVEDEPART', 'TRAFFICTOTAL', 'HAUL']) # Reset the dataframe's indexes so correct rows are compared expected_result.index = range(0, len(expected_result)) test_result.index = range(0, len(test_result)) # Check results match assert_frame_equal(expected_result, test_result, check_dtype=False, check_like=True)