def main():
    if len(sys.argv) < 10:
        print("ERROR: MISSING ARGUMENTS")
        print_usage(sys.argv)
        exit(1)
    else:
        result_path = sys.argv[1]
        result_file_path = sys.argv[2]
        test_data_path = sys.argv[3]
        norm_path = sys.argv[4]
        is_normalised = sys.argv[5]
        is_differenced = sys.argv[6]
        is_proportional = sys.argv[7]
        apply_round = sys.argv[8]
        target_col = sys.argv[9]
        ref_col = sys.argv[10]
        
        test_data = pd.read_csv( test_data_path, sep="," )
        test_preds = np.loadtxt( result_file_path )
        nzr_config = nzr.read_normalization_config( norm_path )

        final_preds = test_preds

        if is_normalised=='True':
            final_preds = nzr.de_normalize_all( test_preds, nzr_config )
  
        if is_differenced=='True':
            final_preds = de_difference( test_data, final_preds, ref_col, target_col )

        if is_proportional=='True':
            final_preds = de_prop_difference( test_data, final_preds, ref_col, target_col, apply_round )

        write_results(final_preds, result_path)
예제 #2
0
def generate_files_for_station(stat, index_column, cut_off_date,
                               forecast_period, list_of_lags):
    folder = 'STN_%s' % str(stat)
    ensure_dir(folder)
    forecast_column = 'STN_%s_PM10' % str(stat)
    new_df = dg.generate_time_dependent_features(df, index_column,
                                                 forecast_column,
                                                 forecast_period, list_of_lags)
    train_df = new_df[new_df['Date'] < cut_off_date]
    test_df = new_df[new_df['Date'] >= cut_off_date]
    # ###########################################################################################################
    #  REMOVE UNWANTED COLUMNS, NORMALISE AND WRITE TO DISK
    #  -- WE REMOVE THE DIFF VERSION OF THE TARGET
    # AS IN THIS PROBLEM DATA IS GENERALLY STATIONARY (IT DOES NOT EXHIBIT OVERALL TREND)
    # ###########################################################################################################
    features = train_df.columns.tolist()

    val_name = 'STN_%s_PM10' % str(stat)
    targ1_name = 'TARGET_STN_%s_PM10_7_VALUE' % str(stat)
    targ2_name = 'TARGET_STN_%s_PM10_7_DIFF' % str(stat)
    targ3_name = 'TARGET_STN_%s_PM10_7_PROP_DIFF' % str(stat)
    unwanted = ['No', 'Date', val_name, targ1_name, targ2_name, targ3_name]

    for x in unwanted:
        features.remove(x)
    features.append(val_name)
    features.append(targ1_name)
    # WRITE OUT THE UN-NORMALISED VERSION
    train_df2 = train_df.loc[:, features]
    test_df2 = test_df.loc[:, features]
    train_df2.to_csv(folder + '/train.csv',
                     encoding='utf-8',
                     index=False,
                     header=True)
    test_df2.to_csv(folder + '/test.csv',
                    encoding='utf-8',
                    index=False,
                    header=True)

    config = nzr.create_normalization_config(train_df2)
    nzr.write_field_config(config, targ1_name, folder + '/nzr_config.yaml')

    train_df_norm = nzr.normalize(train_df2, config, [])
    test_df_norm = nzr.normalize(test_df2, config, [])

    train_df_norm.to_csv(folder + '/train_normalised.csv',
                         sep=' ',
                         encoding='utf-8',
                         index=False,
                         header=False)
    test_df_norm.to_csv(folder + '/test_normalised.csv',
                        sep=' ',
                        encoding='utf-8',
                        index=False,
                        header=False)
               encoding='utf-8',
               index=False,
               header=True)

# ###########################################################################################################
#  CREATE A NORMALISATION CONFIGURATION TO
# ###########################################################################################################

features = train_df.columns.tolist()
unwanted = ["No", "year", "month", "day"]
for x in unwanted:
    features.remove(x)

train_df2 = train_df.loc[:, features]
test_df2 = test_df.loc[:, features]
config = nzr.create_padded_normalization_config(train_df2, 0.05)

# ###########################################################################################################
#
#  GENERATE 3 DIFFERENT TRAINING AND TESTING SETS
#
# ###########################################################################################################

# ###########################################################################################################
#  RAW TARGET NORMALISED
# ###########################################################################################################
features = train_df.columns.tolist()
unwanted = [
    "No", "year", "month", "day", "TARGET_pm2.5_24_DIFF",
    "TARGET_pm2.5_24_PROP_DIFF"
]
#  REMOVE UNWANTED COLUMNS, NORMALISE AND WRITE TO DISK
#  -- WE REMOVE THE DIFF VERSION OF THE TARGET
#     AS IN THIS PROBLEM DATA IS GENERALLY STATIONARY (IT DOES NOT EXHIBIT OVERALL TREND)
# ###########################################################################################################
features = train_df.columns.tolist()
unwanted = ['No', 'Date', 'TARGET_STN_144_PM10_7_DIFF']

for x in unwanted:
    features.remove(x)

train_df2 = train_df.loc[:, features]
test_df2 = test_df.loc[:, features]

target_col = "TARGET_STN_144_PM10_7_VALUE"

config = nzr.create_normalization_config(train_df2)

nzr.write_field_config(config, target_col,
                       'Delhi_Station_144__other_stns_nzr_config.yaml')

train_df_norm = nzr.normalize(train_df2, config, [])
test_df_norm = nzr.normalize(test_df2, config, [])

train_df_norm.to_csv('Station_144_others_Train_normalised.csv',
                     sep=' ',
                     encoding='utf-8',
                     index=False,
                     header=False)
test_df_norm.to_csv('Station_144_others_Test_normalised.csv',
                    sep=' ',
                    encoding='utf-8',
예제 #5
0
               encoding='utf-8',
               index=False,
               header=True)

# ###########################################################################################################
#  REMOVE UNWANTED COLUMNS, NORMALISE AND WRITE TO DISK
# ###########################################################################################################
features = train_df.columns.tolist()
unwanted = ["No"]
for x in unwanted:
    features.remove(x)

train_df2 = train_df.loc[:, features]
test_df2 = test_df.loc[:, features]

config = nzr.create_normalization_config(train_df2)

nzr.write_normalization_config(config, 'results/Normalisation.yaml')
nzr.write_field_config(config, target_column_name,
                       'results/Target_nzr_config.yaml')

train_df_norm = nzr.normalize(train_df2, config, ['Week', 'Day'])
test_df_norm = nzr.normalize(test_df2, config, ['Week', 'Day'])

train_df_norm.to_csv('results/Train_set_normalised.csv',
                     sep=',',
                     encoding='utf-8',
                     index=False,
                     header=True)
test_df_norm.to_csv('results/Test_set_normalised.csv',
                    sep=',',
# ###########################################################################################################
# SPLIT IT INTO TRAIN AND TEST
# ###########################################################################################################
trainset = 30
train_df = new_df.loc[0:trainset,:]
test_df = new_df.loc[trainset+1:,:]

# ###########################################################################################################
# WRITE OUT THE FULL UN-NORMALISED VERSION
# ###########################################################################################################
train_df.to_csv('results/Train_set_full.csv', sep=',', encoding='utf-8', index=False, header=True)
test_df.to_csv('results/Test_set_full.csv', sep=',', encoding='utf-8', index=False, header=True)

# ###########################################################################################################
#  REMOVE UNWANTED COLUMNS, NORMALISE AND WRITE TO DISK
# ###########################################################################################################
features = train_df.columns.tolist()
unwanted = ["No"] 
for x in unwanted : features.remove(x)
train_df2 = train_df.loc[:,features]
test_df2 = test_df.loc[:,features]

config = nzr.create_normalization_config(train_df2)
train_df_norm = nzr.normalize(train_df2, config, ['Week', 'Day'])
test_df_norm = nzr.normalize(test_df2, config, ['Week', 'Day'])

train_df_norm.to_csv('results/Train_set_normalised.csv', sep=',', encoding='utf-8', index=False, header=True)
test_df_norm.to_csv('results/Test_set_normalised.csv', sep=',', encoding='utf-8', index=False, header=True)