def train_and_predict_AR(model, train_data_inputs, train_data_targets, test_data, tr_to_val_split=0.9, tr_verbose=False): # Apply concat data to concatenate the rows that have columns with signal (not the timestamp) train_data_inputs, train_data_targets = concat_data( train_data_inputs), concat_data(train_data_targets) tr_losses, val_losses, model = train_armodel(model, nepochs=model.num_epochs, inputs=train_data_inputs, targets=train_data_targets, tr_split=tr_to_val_split, tr_verbose=tr_verbose) if len(test_data) > 0: predictions_ar = predict_armodel(model=model, eval_input=train_data_inputs[-1], n_predict=len(test_data)) test_error = mean_squared_error(y_true=test_data[:, -1], y_pred=predictions_ar) else: #NOTE: Heuristically setting the number of future predictions predictions_ar = predict_armodel(model=model, eval_input=train_data_inputs[-1], n_predict=132) test_error = np.nan tr_error = tr_losses[-1] # latest training error val_error = val_losses[-1] # latest validation error #print("**********************************************************************************************************") print( "{} - {}, {} - {}, {} - {:.8f}, {} - {:.8f}, {}, - {:.8f}".format( "Model", "AR", "P", model.num_taps, "Training Error", tr_error, "Validation Error", val_error, "Test Error", test_error)) print( "***********************************************************************************************************" ) ''' with open("results__{}.txt".format(model_type), "a") as fp: print("**********************************************************************************************************") print("{} - {}, {} - {}, {} - {:.8f}, {} - {:.8f}, {}, - {:.8f}".format( "Model", "AR", "P", model.num_taps, "Training Error", tr_error, "Validation Error", val_error, "Test Error", test_error), fp) print("***********************************************************************************************************") ''' return predictions_ar, test_error, val_error, tr_error
def train_model_ESN(options, model_type, data, minimum_idx, predict_cycle_num, tau=1, output_file=None): # Get the dataset of inputs and targets based on num_taps if predict_cycle_num == 23 or predict_cycle_num == 76: X, Y = get_msah_training_dataset(data, minimum_idx=minimum_idx, tau=1, p=1) # predict cycle index = entered predict cycle num - 1 xtrain, ytrain, ytest = get_cycle(X, Y, icycle=predict_cycle_num) #print(ytest) options["esn"]["tau"] = 132 options["esn"]["history_q"] = options["esn"]["tau"] + 1 model = load_model_with_opts(options, model_type) # Concat data #print(xtrain[1].shape) xtrain_ct = concat_data(xtrain, col=-1) #ytrain_ct = concat_data(ytrain, col=None) #print(xtrain_ct.shape) else: X, Y = get_msah_training_dataset(data, minimum_idx=minimum_idx, tau=1, p=1) # predict cycle index = entered predict cycle num - 1 xtrain, ytrain, ytest = get_cycle(X, Y, icycle=predict_cycle_num) options["esn"]["tau"] = len(ytest) - 1 options["esn"]["history_q"] = options["esn"]["tau"] + 1 model = load_model_with_opts(options, model_type) # Concat data xtrain_ct = concat_data(xtrain, col=-1) ytrain_ct = concat_data(ytrain, col=-1) #tr_data_signal = xtrain_ct[:, -1].reshape((-1, 1)) #te_data_signal = ytest[:, -1].reshape((-1, 1)) # pred of q values predictions, te_data_signal, pred_indexes = train_and_predict_ESN( model, train_data=xtrain_ct, test_data=ytest) # Saving prediction results if len(ytest) > 0: save_pred_results(output_file=output_file, predictions=predictions, te_data_signal=te_data_signal) return predictions, ytest
def combine_data(self): in_files = get_file_list(self.download_dir, suffix='.csv') out_path = os.path.join(self.download_dir, f'{self.subcategory}_all.xlsx') combined_data = concat_data(in_files) if any(combined_data['Stock'].astype(str).str.contains('.', regex=False)): alert = 'ALERT!\nColumn "Stock" contains decimal numbers.\nColumn misaligned.\nFix data mannually. ' self.logger.warning(alert) combined_data['Stock'] = combined_data['Stock'].astype( str).str.replace(',', '') combined_data['Stock'] = pd.to_numeric(combined_data['Stock'], errors='coerce') combined_data['Subcategory'] = self.subcategory combined_data.to_excel(out_path, index=False)
def main(): parser = argparse.ArgumentParser(description= "Use a variety of recurrent architectures for predicting solar sunpots as a time series\n"\ "Example: python main_gs.py --model_type [esn/linear_ar/rnn/lstm/gru] --dataset dynamo --train_file [full path to training data file] \ --output_file [path to file containing predictions] --test_file [path to test file (if any)] \ --verbosity [1 or 2] \n" "Description of different model types: \n"\ "esn: echo state network,\n" \ "linear_ar: linear autoregressive model, \n"\ "rnn: simple recurrent network (vanilla RNN / Elman unit), \n" \ "lstm: long-short term memory network, \n" \ "gru: gated recurrent units (simplification of lstm architecture)", formatter_class=RawTextHelpFormatter) parser.add_argument("--model_type", help="Enter the desired model", default="esn", type=str) parser.add_argument( "--dataset", help="Type of dataset used - (dynamo/solar_data/sinus)", default="dynamo", type=str) parser.add_argument("--train_file", help="Location of training data file", default=None, type=str) parser.add_argument("--output_file", help="Location of the output file", default=None, type=str) parser.add_argument("--verbose", help="Verbosity (0 or 1)", default=0, type=int) #parser.add_argument("--test_file", help="(Optional) Location of the test data file", default=None, type=str) parser.add_argument("--predict_cycle_num", help="Cycle index to be predicted", default=None, type=int) parser.add_argument( "--grid_search", help="Option to perform grid search or not (1 - True, 0 - False", default=0, type=int) # Parse the arguments args = parser.parse_args() model_type = args.model_type.lower() dataset = args.dataset train_file = args.train_file output_file = args.output_file verbose = args.verbose use_grid_search = args.grid_search # test_file = args.test_file predict_cycle_num = args.predict_cycle_num # Load the configurations required for training # It is assumed that the configurations are present in this location config_file = "./configurations_{}.json".format(dataset) with open(config_file) as f: options = json.load( f) # This loads options as a dict with keys that can be accessed # Load the training data data = np.loadtxt(train_file) # Keep a copy of the unnormalized data unnormalized_data = copy.deepcopy(data) data[:, 1], Xmax, Xmin = normalize(X=data[:, 1], feature_space=(0, 1)) minimum_idx = get_minimum(data, dataset) #data[:, 1] = np.diff(data[:,1], prepend=data[0, 1]) # Get multiple step ahead prediction datasets : #NOTE: Only for Linear_AR so far if model_type == "esn": X, Y = get_msah_training_dataset(data, minimum_idx=minimum_idx, tau=1, p=1) # predict cycle index = entered predict cycle num - 1 xtrain, ytrain, ytest = get_cycle(X, Y, icycle=predict_cycle_num) options["esn"]["tau"] = len(ytest) - 1 options["esn"]["history_q"] = options["esn"]["tau"] + 1 model = load_model_with_opts(options, model_type) # Concat data xtrain_ct = concat_data(xtrain, col=-1) ytrain_ct = concat_data(ytrain, col=-1) #tr_data_signal = xtrain_ct[:, -1].reshape((-1, 1)) #te_data_signal = ytest[:, -1].reshape((-1, 1)) # pred of q values predictions, te_data_signal, pred_indexes = train_and_predict_ESN( model, train_data=xtrain_ct, test_data=ytest) # Saving prediction results save_pred_results(output_file=output_file, predictions=predictions, te_data_signal=te_data_signal) elif model_type == "linear_ar": # Load the model with corresponding options if use_grid_search == 0: model = load_model_with_opts(options, model_type) X, Y = get_msah_training_dataset(data, minimum_idx=minimum_idx, tau=1, p=options[model_type]["num_taps"]) # predict cycle index = entered predict cycle num - 1 xtrain, ytrain, ytest = get_cycle(X, Y, icycle=predict_cycle_num) # pred of q values predictions_ar, test_error, val_error, tr_error = train_and_predict_AR( model, xtrain, ytrain, ytest, tr_to_val_split=0.9, tr_verbose=True) plot_predictions( predictions=predictions_ar, ytest=ytest, title="AR model predictions with {} taps for cycle index {}". format(options[model_type]["num_taps"], predict_cycle_num)) # Save prediction results in a txt file save_pred_results(output_file=output_file, predictions=predictions_ar, te_data_signal=ytest[:, -1]) elif use_grid_search == 1: Error_dict = {} test_predictions = [] test_error_optimal = [] nval = 1 num_total_cycles = len(np.diff(minimum_idx)) #predict_cycle_num_array = list(np.arange(num_total_cycles-nval, num_total_cycles)) predict_cycle_num_array = [predict_cycle_num] params = {"num_taps": list(np.arange(10, 50, 2))} # For Dynamo #params = {"num_taps":list(np.arange(5, 50, 2))} # For Solar #TODO: Fix array nature of optimal_num_taps_all optimal_num_taps_all, training_errors_all, val_errors_all, test_errors_all = grid_search_AR_all_cycles( data=data, solar_indices=minimum_idx, model_type=model_type, options=options, params=params, predict_cycle_num_array=predict_cycle_num_array) Error_dict["validation_errors_with_taps"] = [ (float(params["num_taps"][i]), *val_errors_all[:, i]) for i in range(val_errors_all.shape[1]) ] plt.figure() plt.plot(params["num_taps"], val_errors_all[0], label="Validation MSE") plt.plot(params["num_taps"], training_errors_all[0], label="Training MSE") plt.ylabel("MSE") plt.xlabel("Number of taps") plt.legend() plt.title("Error (MSE) vs number of taps") plt.show() if type(optimal_num_taps_all) != list: optimal_num_taps_all = [optimal_num_taps_all] Error_dict["optimal_num_taps"] = [ float(*optimal_num_taps_all) ] #NOTE: Object of int64 is not json serializable # Retrain the model again with the optimal value for i, optimal_num_taps in enumerate(optimal_num_taps_all): options[model_type]["num_taps"] = optimal_num_taps model = load_model_with_opts(options, model_type) X, Y = get_msah_training_dataset(data, minimum_idx=minimum_idx, tau=1, p=optimal_num_taps) xtrain, ytrain, ytest = get_cycle( X, Y, icycle=predict_cycle_num_array[i]) # pred of q values predictions_ar, test_error, val_error, tr_error = train_and_predict_AR( model, xtrain, ytrain, ytest, tr_to_val_split=0.75, tr_verbose=True) test_predictions.append(predictions_ar.tolist()) if len(ytest) > 0: plot_predictions( predictions=predictions_ar, ytest=ytest, title= "AR model predictions with {} taps for cycle index {}". format(optimal_num_taps, predict_cycle_num_array[i])) test_error_optimal.append(test_error) else: resolution = np.around(np.diff(data[:, 0]).mean(), 1) plt.figure() plt.plot(data[:minimum_idx[-1], 0], data[:minimum_idx[-1], 1], 'r+-') plt.plot( np.arange(ytrain[-1][-1][0] + resolution, ((len(predictions_ar)) * resolution) + ytrain[-1][-1][0], resolution), predictions_ar, 'b*-') plt.legend(['Original timeseries', 'Future prediction']) plt.title( 'Plot of original timeseries and future predictions') plt.show() Error_dict["Test_predictions"] = test_predictions if len(test_error_optimal) > 0: Error_dict["Test_error"] = [test_error_optimal] else: Error_dict["Test_error"] = [] with open( './log/grid_search_results_{}_cycle{}.json'.format( dataset, predict_cycle_num_array[i]), 'w+') as fp: json.dump(Error_dict, fp, indent=2) #TODO: To fix saving result files properly save_pred_results(output_file=output_file, predictions=predictions_ar, te_data_signal=ytest[:, -1]) elif model_type in ["rnn", "lstm", "gru"]: # In case parameter tuning is not carried out if use_grid_search == 0: # Load the model with the corresponding options model = load_model_with_opts(options, model_type) #NOTE: Obtain the data and targets by heuristically setting p num_taps_rnn = 22 X, Y = get_msah_training_dataset(data, minimum_idx=minimum_idx, tau=1, p=num_taps_rnn) # Get xtrain, ytrain, ytest xtrain, ytrain, ytest = get_cycle(X, Y, icycle=predict_cycle_num) # Pred of q values predictions_rnn, test_error, val_error, tr_error = train_and_predict_RNN( model, xtrain, ytrain, ytest, tr_to_val_split=0.90, tr_verbose=True) if len(ytest) > 0: # Normalized predictions in [0, 1] plot_predictions( predictions=predictions_rnn, ytest=ytest, title="{} model predictions with {} taps for cycle index {}" .format(model_type, num_taps_rnn, predict_cycle_num)) # Unnormalized predictions in original scale ytest_un = np.copy(ytest) ytest_un[:, -1] = unnormalize(ytest[:, -1], Xmax, Xmin) plot_predictions( predictions=unnormalize(predictions_rnn, Xmax, Xmin), ytest=ytest_un, title= "{} model predictions (unnormalized) with {} taps for cycle index {}" .format(model_type, num_taps_rnn, predict_cycle_num)) # Save prediction results in a txt file save_pred_results(output_file=output_file, predictions=predictions_rnn, te_data_signal=ytest[:, -1]) else: plot_future_predictions( data=data, minimum_idx=minimum_idx, ytrain=ytrain, predictions=predictions_rnn, title= "Plot of original timeseries and future predictions for {} for cycle index {}" .format(model_type, predict_cycle_num)) plot_future_predictions( data=unnormalized_data, minimum_idx=minimum_idx, ytrain=ytrain, predictions=unnormalize(predictions_rnn, Xmax, Xmin), title= "Plot of original unnormalized timeseries and future predictions for {} for cycle index {}" .format(model_type, predict_cycle_num)) # Save prediction results in a txt file save_pred_results(output_file=output_file, predictions=predictions_rnn, te_data_signal=ytest) elif use_grid_search == 1: gs_params = {"n_hidden": [30, 40, 50]} gs_list_of_options = create_list_of_dicts(options=options, model_type=model_type, param_dict=gs_params) print("Grid Search to be carried over following {} configs:\n". format(len(gs_list_of_options))) val_errors_list = [] for i, gs_option in enumerate(gs_list_of_options): print("Config:{} is \n{}".format(i + 1, gs_option)) # Load the model with the corresponding options model = RNN_model( input_size=gs_option["input_size"], output_size=gs_option["output_size"], n_hidden=gs_option["n_hidden"], n_layers=gs_option["n_layers"], num_directions=gs_option["num_directions"], model_type=gs_option["model_type"], batch_first=gs_option["batch_first"], lr=gs_option["lr"], device=gs_option["device"], num_epochs=gs_option["num_epochs"], ) #NOTE: Obtain the data and targets by heuristically setting p num_taps_rnn = 22 X, Y = get_msah_training_dataset(data, minimum_idx=minimum_idx, tau=1, p=num_taps_rnn) # Get xtrain, ytrain, ytest xtrain, ytrain, ytest = get_cycle(X, Y, icycle=predict_cycle_num) # Pred of q values predictions_rnn, _, val_error, tr_error = train_and_predict_RNN( model, xtrain, ytrain, ytest, tr_to_val_split=0.90, tr_verbose=True) gs_option["Validation_Error"] = val_error gs_option["Training_Error"] = tr_error val_errors_list.append(gs_option) with open( 'gs_results_{}_cycle_{}.json'.format( model_type, predict_cycle_num), 'w') as f: f.write(json.dumps(val_errors_list, indent=2))
def train_and_predict_RNN(model, train_data_inputs, train_data_targets, test_data, tr_to_val_split=0.9, tr_verbose=False): # Apply concat data to concatenate the rows that have columns with signal (not the timestamp) train_data_inputs, train_data_targets = concat_data( train_data_inputs), concat_data(train_data_targets) if len(train_data_inputs.shape) == 2: # Extra dimension to be added N, P = train_data_inputs.shape train_data_inputs = train_data_inputs.reshape((N, P, model.input_size)) #train_data_target = train_data_inputs.reshape((N, P, model.input_size)) # Train - Validation split tr_inputs, tr_targets, val_inputs, val_targets = train_validation_split( train_data_inputs, train_data_targets, tr_split=tr_to_val_split) tr_losses, val_losses, model = train_rnn(model=model, nepochs=model.num_epochs, tr_inputs=tr_inputs, tr_targets=tr_targets, val_inputs=val_inputs, val_targets=val_targets, tr_verbose=tr_verbose) if tr_verbose == True: plot_losses(tr_losses=tr_losses, val_losses=val_losses, logscale=True) # Trying to visualise training data predictions #predictions_rnn_train = predict_rnn(model=model, eval_input=train_data_inputs[0, :, :].reshape((1, P, -1)), n_predict=len(train_data_targets)) #plot_training_predictions(ytrain=train_data_targets, predictions=predictions_rnn_train, title="Predictions for Training data") if len(test_data) > 0: predictions_rnn = predict_rnn( model=model, eval_input=train_data_inputs[-1, :, :].reshape((1, P, -1)), n_predict=len(test_data)) test_error = mean_squared_error(y_true=test_data[:, -1], y_pred=predictions_rnn) else: #NOTE: Heuristically setting the number of future predictions predictions_rnn = predict_rnn( model=model, eval_input=train_data_inputs[-1, :, :].reshape((1, P, -1)), n_predict=132) test_error = np.nan # No reference to compare for genearting Test error tr_error = tr_losses[-1] # latest training error val_error = val_losses[-1] # latest validation error #print("**********************************************************************************************************") print("{} - {}, {} - {}, {} - {}, {}, - {}".format( "Model", model.model_type, "Training Error", tr_error, "Validation Error", val_error, "Test Error", test_error)) print( "***********************************************************************************************************" ) return predictions_rnn, test_error, val_error, tr_error
def train_and_predict_RNN(model, options, train_data_inputs, train_data_targets, test_data, tr_to_val_split=0.9, tr_verbose=False, use_grid_search=0): # Count number of model parameters total_num_params, total_num_trainable_params = count_params(model=model) print("The total number of params: {} and the number of trainable params:{}".format(total_num_params, total_num_trainable_params)) # Apply concat data to concatenate the rows that have columns with signal (not the timestamp) train_data_inputs, train_data_targets = concat_data(train_data_inputs), concat_data(train_data_targets) if len(train_data_inputs.shape) == 2: # Extra dimension to be added N, P = train_data_inputs.shape train_data_inputs = train_data_inputs.reshape((N, P, model.input_size)) #train_data_target = train_data_inputs.reshape((N, P, model.input_size)) # Train - Validation split tr_inputs, tr_targets, val_inputs, val_targets = train_validation_split( train_data_inputs, train_data_targets, tr_split=tr_to_val_split) tr_losses, val_losses, model, best_model_wts, best_val_loss, best_val_epoch = train_rnn(model=model, nepochs=model.num_epochs, tr_inputs=tr_inputs, tr_targets=tr_targets, val_inputs=val_inputs, val_targets=val_targets, tr_verbose=tr_verbose) print("Model saved at epoch:{} with val loss:{}".format(best_val_epoch, best_val_loss)) device = get_device() model_best = RNN_model( input_size=options["input_size"], output_size=options["output_size"], n_hidden=options["n_hidden"], n_layers=options["n_layers"], num_directions=options["num_directions"], model_type=options["model_type"], batch_first=options["batch_first"], lr=options["lr"], num_epochs=options["num_epochs"], ).to(device) #model_best = load_model_with_opts(options, model.model_type).to(device) # Load the best weights model_best.load_state_dict(best_model_wts) #if tr_verbose == True: # plot_losses(tr_losses=tr_losses, val_losses=val_losses, logscale=True) # Trying to visualise training data predictions #predictions_rnn_train = predict_rnn(model=model, eval_input=train_data_inputs[0, :, :].reshape((1, P, -1)), n_predict=len(train_data_targets)) #plot_training_predictions(ytrain=train_data_targets, predictions=predictions_rnn_train, title="Predictions for Training data") eval_input = torch.from_numpy(train_data_inputs[-1, :, :].reshape((1, P, -1))) if len(test_data) > 0: predictions_rnn = predict_rnn(model=model_best, eval_input=eval_input, n_predict=len(test_data)) test_error = mean_squared_error(y_true=test_data[:, -1], y_pred=predictions_rnn) else: #NOTE: Heuristically setting the number of future predictions predictions_rnn = predict_rnn(model=model_best, eval_input=eval_input, n_predict=132) test_error = np.nan # No reference to compare for genearting Test error tr_error = tr_losses[-1] # latest training error val_error = val_losses[-1] # latest validation error #print("**********************************************************************************************************") if use_grid_search == 0: print("{} - {}, {} - {}, {} - {}, {} - {}, {} - {}".format("Model", model_best.model_type, "Training Error", tr_error, "Validation Error", val_error, "Best Validation Error", best_val_loss,"Test Error", test_error)) print("***********************************************************************************************************") elif use_grid_search == 1: print("{} - {}, {} - {}, {} - {}, {} - {}".format("Model", model_best.model_type, "Training Error", tr_error,"Validation Error", val_error, "Best Validation Error", best_val_loss)) print("***********************************************************************************************************") best_val_loss = best_val_loss.cpu().numpy() return predictions_rnn, test_error, best_val_loss, tr_error
def combine_subcategory_data(self): in_files = get_file_list(self.download_dir, suffix='all.xlsx') out_path = os.path.join(self.download_dir, 'combine.xlsx') df = concat_data(in_files) df.to_excel(out_path)
def main(): parser = argparse.ArgumentParser(description= "Use a variety of recurrent architectures for predicting solar sunpots as a time series\n"\ "Example: python main.py --model_type [esn/linear_ar/rnn/lstm/gru] --dataset dynamo --train_file [full path to training data file] \ --output_file [path to file containing predictions] --test_file [path to test file (if any)] \ --verbosity [1 or 2] \n" "Description of different model types: \n"\ "esn: echo state network,\n" \ "linear_ar: linear autoregressive model, \n"\ "rnn: simple recurrent network (vanilla RNN / Elman unit), \n" \ "lstm: long-short term memory network, \n" \ "gru: gated recurrent units (simplification of lstm architecture)", formatter_class=RawTextHelpFormatter) parser.add_argument("--model_type", help="Enter the desired model", default="esn", type=str) parser.add_argument("--dataset", help="Type of dataset used - (dynamo/solar_data/sinus)", default="dynamo", type=str) parser.add_argument("--train_file", help="Location of training data file", default=None, type=str) parser.add_argument("--output_file", help="Location of the output file", default=None, type=str) parser.add_argument("--verbose", help="Verbosity (0 or 1)", default=0, type=int) #parser.add_argument("--test_file", help="(Optional) Location of the test data file", default=None, type=str) parser.add_argument("--predict_cycle_num", help="Cycle number to be predicted", default=None, type=int) # Parse the arguments args = parser.parse_args() model_type = args.model_type.lower() dataset = args.dataset train_file = args.train_file output_file = args.output_file verbose = args.verbose # test_file = args.test_file predict_cycle_num = args.predict_cycle_num # Load the configurations required for training config_file = "./configurations.json" # It is assumed that the configurations are # present in this location with open(config_file) as f: options = json.load(f) # This loads options as a dict with keys that can be accessed options[model_type]["num_taps"] = 10 p = options[model_type]["num_taps"] # Load the training data data = np.loadtxt(train_file) data[:, 1] = 2*((data[:, 1] - data[:,1].min())/(data[:,1].max() - data[:,1].min())) - 1 minimum_idx = get_minimum(data, dataset) # plt.figure() # Get multiple step ahead prediction datasets X, Y = get_msah_training_dataset(data, minimum_idx, tau=1, p=p) # options[model_type]["num_taps"] n_cycles = len(Y) n_tests = 3 # xtrain, ytrain, ytest = get_cycle(X, Y, n_cycles+1) P = [10, 20, 30] val_err = np.zeros((n_cycles, len(P))) # errors = new_train_ar(data,minimum_idx) # errors = {"validatation errors": (n_val_cycles, n_tried_numtapsvalues), # "test_errors":(n_test_cycles,), # "test_predictions: list of n_test_cycles arrays [ (length of 1st test cycle, 2), .. ] # "future_points": (120,) # } for ip, p in enumerate(P): X, Y = get_msah_training_dataset(data, minimum_idx, tau=1, p=p) for icycle in range(n_cycles-n_tests): xtrain, ytrain, yval = get_cycle(X, Y, icycle) if model_type == "linear_ar": model = Linear_AR( num_taps=p, lossfn_type=options[model_type]["lossfn_type"], lr=options[model_type]["lr"], num_epochs=options[model_type]["num_epochs"], init_net=options[model_type]["init_net"], device=options[model_type]["device"] ) predictions = train_and_predict_AR(model, concat_data(xtrain), concat_data(ytrain), yval[:, 1]) elif model_type == "rnn": # Usage: # python /home/[email protected]/Desktop/projects/NovelESN/main.py --model_type rnn --dataset dynamo --train_file data/dynamo_esn.txt --output_file tmp.txt --predict_cycle_num 10 X, Y = get_msah_training_dataset(data, minimum_idx, tau=1, p=np.inf) predictions = train_and_predict_RNN(X, Y, enplot=False, n_future=120, dataset=dataset) sys.exit(0) val_err[icycle, ip] = mean_squared_error(yval[:, 1], predictions) optimal_p = np.argmin(val_err.mean(0)).reshape(-1)[0] X, Y = get_msah_training_dataset(data, minimum_idx, tau=1, p=optimal_p) test_err_ar = np.zeros(n_tests) for i_test_cycle in range(n_cycles-n_tests, n_cycles): xtrain, ytrain, ytest = get_cycle(X, Y, i_test_cycle) model = load_model_with_opts(options, model_type) predictions = train_and_predict_AR(model, concat_data(xtrain), concat_data(ytrain), yval[:, 1]) test_err_ar[i_test_cycle] = mean_squared_error(ytest[:, 1], predictions) # model = load_model_with_opts(options, model_type) model = RNN(input_size=p, hidden_size=10) predictions = train_and_predict_RNN(model, concat_data(xtrain), concat_data(ytrain), ytest[:, 1]) err[icycle] = mean_squared_error(ytest[:, 1], predictions) plot_predictions( ytest=ytest, predictions=predictions, title="Predictions using Linear AR model" ) plt.figure(); plt.plot(list(range(n_cycles)), err) plt.show() sys.exit(0) if model_type == "esn": options["esn"]["tau"] = len(te_data_signal) - 1 options["esn"]["history_q"] = options["esn"]["tau"] + 1 print("Shape of training data:{}".format(tr_data_signal.shape)) print("Shape of testing data:{}".format(te_data_signal.shape)) # Load the model with corresponding options model = load_model_with_opts(options, model_type) # pred of q values predictions, pred_indexes = train_and_predict_ESN(model, tr_data_signal, te_data_signal) elif model_type == "linear_ar": # Load the model with corresponding options model = load_model_with_opts(options, model_type) # pred of q values predictions, pred_indexes = train_and_predict_AR(model, xtrain, ytrain, ytest) elif model_type == "rnn": model = load_model_with_opts(options, model_type) with open("results__{}.txt".format(model_type), "a") as fp: print("\t".join( ["{}:{}".format(k, v) for k, v in options["linear_ar"].items()] + ["{}:{}".format("test__mse", ((predictions-te_data_signal)**2).mean())] + ["{}:{}".format("train__mse", ((predictions - te_data_signal) ** 2).mean())] + ["{}:{}".format("val__mse", ((predictions - te_data_signal) ** 2).mean())] ), file=fp) # Save the results in the output file np.savetxt(fname=output_file, X=np.concatenate([predictions.reshape(-1, 1), te_data_signal.reshape(-1, 1)], axis=1) )