def main(): if len(sys.argv) < 10: print("ERROR: MISSING ARGUMENTS") print_usage(sys.argv) exit(1) else: result_path = sys.argv[1] result_file_path = sys.argv[2] test_data_path = sys.argv[3] norm_path = sys.argv[4] is_normalised = sys.argv[5] is_differenced = sys.argv[6] is_proportional = sys.argv[7] apply_round = sys.argv[8] target_col = sys.argv[9] ref_col = sys.argv[10] test_data = pd.read_csv( test_data_path, sep="," ) test_preds = np.loadtxt( result_file_path ) nzr_config = nzr.read_normalization_config( norm_path ) final_preds = test_preds if is_normalised=='True': final_preds = nzr.de_normalize_all( test_preds, nzr_config ) if is_differenced=='True': final_preds = de_difference( test_data, final_preds, ref_col, target_col ) if is_proportional=='True': final_preds = de_prop_difference( test_data, final_preds, ref_col, target_col, apply_round ) write_results(final_preds, result_path)
def generate_files_for_station(stat, index_column, cut_off_date, forecast_period, list_of_lags): folder = 'STN_%s' % str(stat) ensure_dir(folder) forecast_column = 'STN_%s_PM10' % str(stat) new_df = dg.generate_time_dependent_features(df, index_column, forecast_column, forecast_period, list_of_lags) train_df = new_df[new_df['Date'] < cut_off_date] test_df = new_df[new_df['Date'] >= cut_off_date] # ########################################################################################################### # REMOVE UNWANTED COLUMNS, NORMALISE AND WRITE TO DISK # -- WE REMOVE THE DIFF VERSION OF THE TARGET # AS IN THIS PROBLEM DATA IS GENERALLY STATIONARY (IT DOES NOT EXHIBIT OVERALL TREND) # ########################################################################################################### features = train_df.columns.tolist() val_name = 'STN_%s_PM10' % str(stat) targ1_name = 'TARGET_STN_%s_PM10_7_VALUE' % str(stat) targ2_name = 'TARGET_STN_%s_PM10_7_DIFF' % str(stat) targ3_name = 'TARGET_STN_%s_PM10_7_PROP_DIFF' % str(stat) unwanted = ['No', 'Date', val_name, targ1_name, targ2_name, targ3_name] for x in unwanted: features.remove(x) features.append(val_name) features.append(targ1_name) # WRITE OUT THE UN-NORMALISED VERSION train_df2 = train_df.loc[:, features] test_df2 = test_df.loc[:, features] train_df2.to_csv(folder + '/train.csv', encoding='utf-8', index=False, header=True) test_df2.to_csv(folder + '/test.csv', encoding='utf-8', index=False, header=True) config = nzr.create_normalization_config(train_df2) nzr.write_field_config(config, targ1_name, folder + '/nzr_config.yaml') train_df_norm = nzr.normalize(train_df2, config, []) test_df_norm = nzr.normalize(test_df2, config, []) train_df_norm.to_csv(folder + '/train_normalised.csv', sep=' ', encoding='utf-8', index=False, header=False) test_df_norm.to_csv(folder + '/test_normalised.csv', sep=' ', encoding='utf-8', index=False, header=False)
encoding='utf-8', index=False, header=True) # ########################################################################################################### # CREATE A NORMALISATION CONFIGURATION TO # ########################################################################################################### features = train_df.columns.tolist() unwanted = ["No", "year", "month", "day"] for x in unwanted: features.remove(x) train_df2 = train_df.loc[:, features] test_df2 = test_df.loc[:, features] config = nzr.create_padded_normalization_config(train_df2, 0.05) # ########################################################################################################### # # GENERATE 3 DIFFERENT TRAINING AND TESTING SETS # # ########################################################################################################### # ########################################################################################################### # RAW TARGET NORMALISED # ########################################################################################################### features = train_df.columns.tolist() unwanted = [ "No", "year", "month", "day", "TARGET_pm2.5_24_DIFF", "TARGET_pm2.5_24_PROP_DIFF" ]
# REMOVE UNWANTED COLUMNS, NORMALISE AND WRITE TO DISK # -- WE REMOVE THE DIFF VERSION OF THE TARGET # AS IN THIS PROBLEM DATA IS GENERALLY STATIONARY (IT DOES NOT EXHIBIT OVERALL TREND) # ########################################################################################################### features = train_df.columns.tolist() unwanted = ['No', 'Date', 'TARGET_STN_144_PM10_7_DIFF'] for x in unwanted: features.remove(x) train_df2 = train_df.loc[:, features] test_df2 = test_df.loc[:, features] target_col = "TARGET_STN_144_PM10_7_VALUE" config = nzr.create_normalization_config(train_df2) nzr.write_field_config(config, target_col, 'Delhi_Station_144__other_stns_nzr_config.yaml') train_df_norm = nzr.normalize(train_df2, config, []) test_df_norm = nzr.normalize(test_df2, config, []) train_df_norm.to_csv('Station_144_others_Train_normalised.csv', sep=' ', encoding='utf-8', index=False, header=False) test_df_norm.to_csv('Station_144_others_Test_normalised.csv', sep=' ', encoding='utf-8',
encoding='utf-8', index=False, header=True) # ########################################################################################################### # REMOVE UNWANTED COLUMNS, NORMALISE AND WRITE TO DISK # ########################################################################################################### features = train_df.columns.tolist() unwanted = ["No"] for x in unwanted: features.remove(x) train_df2 = train_df.loc[:, features] test_df2 = test_df.loc[:, features] config = nzr.create_normalization_config(train_df2) nzr.write_normalization_config(config, 'results/Normalisation.yaml') nzr.write_field_config(config, target_column_name, 'results/Target_nzr_config.yaml') train_df_norm = nzr.normalize(train_df2, config, ['Week', 'Day']) test_df_norm = nzr.normalize(test_df2, config, ['Week', 'Day']) train_df_norm.to_csv('results/Train_set_normalised.csv', sep=',', encoding='utf-8', index=False, header=True) test_df_norm.to_csv('results/Test_set_normalised.csv', sep=',',
# ########################################################################################################### # SPLIT IT INTO TRAIN AND TEST # ########################################################################################################### trainset = 30 train_df = new_df.loc[0:trainset,:] test_df = new_df.loc[trainset+1:,:] # ########################################################################################################### # WRITE OUT THE FULL UN-NORMALISED VERSION # ########################################################################################################### train_df.to_csv('results/Train_set_full.csv', sep=',', encoding='utf-8', index=False, header=True) test_df.to_csv('results/Test_set_full.csv', sep=',', encoding='utf-8', index=False, header=True) # ########################################################################################################### # REMOVE UNWANTED COLUMNS, NORMALISE AND WRITE TO DISK # ########################################################################################################### features = train_df.columns.tolist() unwanted = ["No"] for x in unwanted : features.remove(x) train_df2 = train_df.loc[:,features] test_df2 = test_df.loc[:,features] config = nzr.create_normalization_config(train_df2) train_df_norm = nzr.normalize(train_df2, config, ['Week', 'Day']) test_df_norm = nzr.normalize(test_df2, config, ['Week', 'Day']) train_df_norm.to_csv('results/Train_set_normalised.csv', sep=',', encoding='utf-8', index=False, header=True) test_df_norm.to_csv('results/Test_set_normalised.csv', sep=',', encoding='utf-8', index=False, header=True)