def train_model_ESN(options, model_type, data, minimum_idx, predict_cycle_num, tau=1, output_file=None): # Get the dataset of inputs and targets based on num_taps if predict_cycle_num == 23 or predict_cycle_num == 76: X, Y = get_msah_training_dataset(data, minimum_idx=minimum_idx, tau=1, p=1) # predict cycle index = entered predict cycle num - 1 xtrain, ytrain, ytest = get_cycle(X, Y, icycle=predict_cycle_num) #print(ytest) options["esn"]["tau"] = 132 options["esn"]["history_q"] = options["esn"]["tau"] + 1 model = load_model_with_opts(options, model_type) # Concat data #print(xtrain[1].shape) xtrain_ct = concat_data(xtrain, col=-1) #ytrain_ct = concat_data(ytrain, col=None) #print(xtrain_ct.shape) else: X, Y = get_msah_training_dataset(data, minimum_idx=minimum_idx, tau=1, p=1) # predict cycle index = entered predict cycle num - 1 xtrain, ytrain, ytest = get_cycle(X, Y, icycle=predict_cycle_num) options["esn"]["tau"] = len(ytest) - 1 options["esn"]["history_q"] = options["esn"]["tau"] + 1 model = load_model_with_opts(options, model_type) # Concat data xtrain_ct = concat_data(xtrain, col=-1) ytrain_ct = concat_data(ytrain, col=-1) #tr_data_signal = xtrain_ct[:, -1].reshape((-1, 1)) #te_data_signal = ytest[:, -1].reshape((-1, 1)) # pred of q values predictions, te_data_signal, pred_indexes = train_and_predict_ESN( model, train_data=xtrain_ct, test_data=ytest) # Saving prediction results if len(ytest) > 0: save_pred_results(output_file=output_file, predictions=predictions, te_data_signal=te_data_signal) return predictions, ytest
def grid_search_AR_single_cycle(data, solar_indices, model_type, options, params, predict_cycle_num): params_dict_list_all = create_list_of_dicts(options=options, model_type=model_type, param_dict=params) val_errors = np.zeros((1, len(params["num_taps"]))) training_errors = np.zeros((1, len(params["num_taps"]))) test_errors = np.zeros((1, len(params["num_taps"]))) #prediction_array = [] for i, param_options in enumerate(params_dict_list_all): p = param_options["num_taps"] assert (p > 0) == True, print("Invalid order specified as parameter") print("Parameter set used:\n{}".format(param_options)) X, Y = get_msah_training_dataset(data, minimum_idx=solar_indices,tau=options[model_type]["output_size"], p=p) xtrain, ytrain, ytest = get_cycle(X, Y, icycle=predict_cycle_num) options[model_type]["num_taps"] = p model = load_model_with_opts(options, model_type) predictions, test_error, val_error, tr_error = train_and_predict_AR( model, xtrain, ytrain, ytest) val_errors[:, i] = val_error training_errors[:, i] = tr_error test_errors[:, i] = test_error return training_errors, val_errors, test_errors
def train_model_AR(options, model_type, data, minimum_idx, predict_cycle_num, tau=1, output_file=None, use_grid_search=0): # Load the model with corresponding options if use_grid_search == 0: model = load_model_with_opts(options, model_type) X, Y = get_msah_training_dataset(data, minimum_idx=minimum_idx, tau=options[model_type]["output_size"], p=options[model_type]["num_taps"]) # predict cycle index = entered predict cycle num - 1 xtrain, ytrain, ytest = get_cycle(X, Y, icycle=predict_cycle_num) # pred of q values predictions_ar, test_error, val_error, tr_error = train_and_predict_AR(model, xtrain, ytrain, ytest, tr_to_val_split=0.9, tr_verbose=True) # Save prediction results in a txt file if len(ytest) > 0: plot_predictions(predictions=predictions_ar, ytest=ytest, title="AR model predictions with {} taps for cycle index {}".format( options[model_type]["num_taps"], predict_cycle_num)) save_pred_results(output_file=output_file, predictions=predictions_ar, te_data_signal=ytest[:,-1]) elif use_grid_search == 1: logfile = './param_selection/{}_gs_cycle_{}_1_logs.txt'.format(model_type, predict_cycle_num) orig_stdout = sys.stdout f_tmp = open(logfile, 'w') sys.stdout = f_tmp Error_dict = {} test_predictions = [] test_error_optimal = [] #nval = 1 num_total_cycles = len(np.diff(minimum_idx)) #predict_cycle_num_array = list(np.arange(num_total_cycles-nval, num_total_cycles)) predict_cycle_num_array = [predict_cycle_num] params = {"num_taps":list(np.arange(10, 40, 1))} # For Dynamo #params = {"num_taps":list(np.arange(5, 50, 2))} # For Solar #TODO: Fix array nature of optimal_num_taps_all optimal_num_taps_all, training_errors_all, val_errors_all, test_errors_all = grid_search_AR_all_cycles(data=data, solar_indices=minimum_idx, model_type=model_type, options=options, params=params, predict_cycle_num_array=predict_cycle_num_array) Error_dict["validation_errors_with_taps"] = [(float(params["num_taps"][i]), *val_errors_all[:,i]) for i in range(val_errors_all.shape[1])] plt.figure() plt.plot(params["num_taps"], val_errors_all[0], label="Validation MSE") plt.ylabel("MSE") plt.xlabel("Number of taps") plt.legend() plt.title("Error (MSE) vs number of taps") plt.show() if type(optimal_num_taps_all) != list: optimal_num_taps_all = [optimal_num_taps_all] Error_dict["optimal_num_taps"] = [float(*optimal_num_taps_all)] #NOTE: Object of int64 is not json serializable # Retrain the model again with the optimal value for i, optimal_num_taps in enumerate(optimal_num_taps_all): options[model_type]["num_taps"] = optimal_num_taps model = load_model_with_opts(options, model_type) X, Y = get_msah_training_dataset(data, minimum_idx=minimum_idx,tau=options[model_type]["output_size"], p=optimal_num_taps) xtrain, ytrain, ytest = get_cycle(X, Y, icycle=predict_cycle_num_array[i]) # pred of q values predictions_ar, test_error, val_error, tr_error = train_and_predict_AR(model, xtrain, ytrain, ytest, tr_to_val_split=0.90, tr_verbose=True) test_predictions.append(predictions_ar.tolist()) if len(ytest) > 0: plot_predictions(predictions=predictions_ar, ytest=ytest, title="AR model predictions with {} taps for cycle index {}".format( optimal_num_taps, predict_cycle_num_array[i])) test_error_optimal.append(test_error) else: plot_future_predictions(data=data, minimum_idx=minimum_idx, ytrain=ytrain, predictions=predictions_ar, title='Plot of original timeseries and future predictions for AR model') Error_dict["Test_predictions"] = test_predictions if len(test_error_optimal) > 0: Error_dict["Test_error"] = [test_error_optimal] else: Error_dict["Test_error"] = [] with open('./param_selection/gsresults_{}_cycle{}_1.json'.format(model_type, predict_cycle_num_array[i]), 'w+') as fp: json.dump(Error_dict, fp, indent=2) # Saving result files properly if len(ytest) > 0: save_pred_results(output_file=output_file, predictions=predictions_ar, te_data_signal=ytest[:,-1]) return predictions_ar
def train_model_RNN(options, model_type, data, minimum_idx, predict_cycle_num, tau=1, output_file=None, use_grid_search=0, Xmax=None, Xmin=None): #tau_chosen = 1 #Usual case #tau_chosen = options[model_type]["output_size"] #print("Tau chosen {}".format(tau_chosen)) # In case parameter tuning is not carried out if use_grid_search == 0: # Load the model with the corresponding options model = load_model_with_opts(options, model_type) #NOTE: Obtain the data and targets by heuristically setting p num_taps_rnn = 22 X, Y = get_msah_training_dataset(data, minimum_idx=minimum_idx, tau = tau, p=num_taps_rnn) # Get xtrain, ytrain, ytest xtrain, ytrain, ytest = get_cycle(X, Y, icycle=predict_cycle_num) # Pred of q values predictions_rnn, test_error, val_error, tr_error = train_and_predict_RNN(model, options[model_type], xtrain, ytrain, ytest, tr_to_val_split=0.90, tr_verbose=True) if len(ytest) > 0: # Normalized predictions in [0, 1] #plot_predictions(predictions=predictions_rnn, ytest=ytest, title="{} model predictions with {} taps for cycle index {}".format( # model_type, num_taps_rnn, predict_cycle_num)) # Unnormalized predictions in original scale #ytest_un = np.copy(ytest) #ytest_un[:,-1] = unnormalize(ytest[:,-1], Xmax, Xmin) #plot_predictions(predictions=unnormalize(predictions_rnn, Xmax, Xmin), ytest=ytest_un, title="{} model predictions (unnormalized) with {} taps for cycle index {}".format( # model_type, num_taps_rnn, predict_cycle_num)) # Save prediction results in a txt file save_pred_results(output_file=output_file, predictions=predictions_rnn, te_data_signal=ytest[:,-1]) else: #plot_future_predictions(data=data, minimum_idx=minimum_idx, ytrain=ytrain, predictions=predictions_rnn, #title="Plot of original timeseries and future predictions for {} for cycle index {}".format( # model_type, predict_cycle_num)) #plot_future_predictions(data=unnormalized_data, minimum_idx=minimum_idx, ytrain=ytrain, predictions=unnormalize(predictions_rnn, Xmax, Xmin), #title="Plot of original unnormalized timeseries and future predictions for {} for cycle index {}".format( # model_type, predict_cycle_num)) # Save prediction results in a txt file save_pred_results(output_file=output_file, predictions=predictions_rnn, te_data_signal=ytest) elif use_grid_search == 1: logfile = './param_selection/{}_gs_cycle_{}_logs_mbatch_diffos.txt'.format(model_type, predict_cycle_num) jsonfile = './param_selection/gsresults_{}_cycle{}_mbatch_diffos.json'.format(model_type, predict_cycle_num) orig_stdout = sys.stdout f_tmp = open(logfile, 'a') sys.stdout = f_tmp #gs_params = {"n_hidden":[20, 30, 40, 50, 60], # "output_size":[1,5,10], # "num_epochs":[4000] # } gs_params = {"n_hidden":[40, 45, 50], "n_layers":[1, 2], "output_size":[1], "num_epochs":[3000, 4000] } ''' gs_params = {"n_hidden":[20, 30, 40, 50, 60], "n_layers":[1], "output_size":[1], "num_epochs":[500] } ''' gs_list_of_options = create_list_of_dicts(options=options, model_type=model_type, param_dict=gs_params) print("Grid Search to be carried over following {} configs:\n".format(len(gs_list_of_options))) val_errors_list = [] for i, gs_option in enumerate(gs_list_of_options): print("Config:{} is \n{}".format(i+1, gs_option)) # Load the model with the corresponding options model = RNN_model( input_size=gs_option["input_size"], output_size=gs_option["output_size"], n_hidden=gs_option["n_hidden"], n_layers=gs_option["n_layers"], num_directions=gs_option["num_directions"], model_type=gs_option["model_type"], batch_first=gs_option["batch_first"], lr=gs_option["lr"], num_epochs=gs_option["num_epochs"], ) #NOTE: Obtain the data and targets by heuristically setting p num_taps_rnn = 22 X, Y = get_msah_training_dataset(data, minimum_idx=minimum_idx, tau = model.output_size, p=num_taps_rnn) # Get xtrain, ytrain, ytest xtrain, ytrain, ytest = get_cycle(X, Y, icycle=predict_cycle_num) # Pred of q values predictions_rnn, _, val_error, tr_error = train_and_predict_RNN(model, gs_option, xtrain, ytrain, ytest, tr_to_val_split=0.90, tr_verbose=True, use_grid_search=use_grid_search) gs_option["Validation_Error"] = val_error gs_option["Training_Error"] = tr_error val_errors_list.append(gs_option) with open(jsonfile, 'w') as f: f.write(json.dumps(val_errors_list, cls=NDArrayEncoder, indent=2)) sys.stdout = orig_stdout f.close() return predictions_rnn
def train_model_RNN(options, model_type, data, minimum_idx, predict_cycle_num, tau=1, output_file=None, use_grid_search=0, Xmax=None, Xmin=None): #tau_chosen = 1 #Usual case #tau_chosen = options[model_type]["output_size"] #print("Tau chosen {}".format(tau_chosen)) # In case parameter tuning is not carried out if use_grid_search == 0: # Load the model with the corresponding options num_trials = 10 #NOTE: Obtain the data and targets by heuristically setting p num_taps_rnn = 22 X, Y = get_msah_training_dataset(data, minimum_idx=minimum_idx, tau=tau, p=num_taps_rnn) # Get xtrain, ytrain, ytest xtrain, ytrain, ytest = get_cycle(X, Y, icycle=predict_cycle_num) if len(ytest) > 0: predictions_rnn = np.zeros((num_trials, len(ytest))) else: predictions_rnn = np.zeros((num_trials, 132)) for t in range(num_trials): print("Trial no. {}".format(t + 1)) model_t = load_model_with_opts(options, model_type) # Pred of q values predictions_rnn_t, test_error, val_error, tr_error = train_and_predict_RNN( model_t, xtrain, ytrain, ytest, tr_to_val_split=0.90, tr_verbose=False) predictions_rnn[t, :] = predictions_rnn_t.flatten() elif use_grid_search == 1: logfile = './param_selection/{}_gs_cycle_{}_logs.txt'.format( model_type, predict_cycle_num) jsonfile = './param_selection/gsresults_{}_cycle{}.json'.format( model_type, predict_cycle_num) orig_stdout = sys.stdout f_tmp = open(logfile, 'w') sys.stdout = f_tmp #gs_params = {"n_hidden":[20, 30, 40, 50, 60], # "output_size":[1,5,10], # "num_epochs":[4000] # } gs_params = { "n_hidden": [20, 30, 40, 50, 60], "output_size": [1, 5, 10], "num_epochs": [4000, 5000] } gs_list_of_options = create_list_of_dicts(options=options, model_type=model_type, param_dict=gs_params) print("Grid Search to be carried over following {} configs:\n".format( len(gs_list_of_options))) val_errors_list = [] for i, gs_option in enumerate(gs_list_of_options): print("Config:{} is \n{}".format(i + 1, gs_option)) # Load the model with the corresponding options model = RNN_model( input_size=gs_option["input_size"], output_size=gs_option["output_size"], n_hidden=gs_option["n_hidden"], n_layers=gs_option["n_layers"], num_directions=gs_option["num_directions"], model_type=gs_option["model_type"], batch_first=gs_option["batch_first"], lr=gs_option["lr"], device=gs_option["device"], num_epochs=gs_option["num_epochs"], ) #NOTE: Obtain the data and targets by heuristically setting p #num_taps_rnn = 22 num_taps_rnn = 32 X, Y = get_msah_training_dataset(data, minimum_idx=minimum_idx, tau=model.output_size, p=num_taps_rnn) # Get xtrain, ytrain, ytest xtrain, ytrain, ytest = get_cycle(X, Y, icycle=predict_cycle_num) # Pred of q values predictions_rnn, _, val_error, tr_error = train_and_predict_RNN( model, xtrain, ytrain, ytest, tr_to_val_split=0.90, tr_verbose=True, use_grid_search=use_grid_search) gs_option["Validation_Error"] = val_error gs_option["Training_Error"] = tr_error val_errors_list.append(gs_option) with open(jsonfile, 'w') as f: f.write(json.dumps(val_errors_list, indent=2)) sys.stdout = orig_stdout f.close() return predictions_rnn