コード例 #1
0
def train_model_ESN(options,
                    model_type,
                    data,
                    minimum_idx,
                    predict_cycle_num,
                    tau=1,
                    output_file=None):

    # Get the dataset of inputs and targets based on num_taps
    if predict_cycle_num == 23 or predict_cycle_num == 76:
        X, Y = get_msah_training_dataset(data,
                                         minimum_idx=minimum_idx,
                                         tau=1,
                                         p=1)
        # predict cycle index = entered predict cycle num - 1
        xtrain, ytrain, ytest = get_cycle(X, Y, icycle=predict_cycle_num)
        #print(ytest)
        options["esn"]["tau"] = 132
        options["esn"]["history_q"] = options["esn"]["tau"] + 1
        model = load_model_with_opts(options, model_type)

        # Concat data
        #print(xtrain[1].shape)
        xtrain_ct = concat_data(xtrain, col=-1)
        #ytrain_ct = concat_data(ytrain, col=None)
        #print(xtrain_ct.shape)

    else:
        X, Y = get_msah_training_dataset(data,
                                         minimum_idx=minimum_idx,
                                         tau=1,
                                         p=1)
        # predict cycle index = entered predict cycle num - 1
        xtrain, ytrain, ytest = get_cycle(X, Y, icycle=predict_cycle_num)

        options["esn"]["tau"] = len(ytest) - 1
        options["esn"]["history_q"] = options["esn"]["tau"] + 1
        model = load_model_with_opts(options, model_type)

        # Concat data
        xtrain_ct = concat_data(xtrain, col=-1)
        ytrain_ct = concat_data(ytrain, col=-1)

    #tr_data_signal = xtrain_ct[:, -1].reshape((-1, 1))
    #te_data_signal = ytest[:, -1].reshape((-1, 1))

    # pred of q values
    predictions, te_data_signal, pred_indexes = train_and_predict_ESN(
        model, train_data=xtrain_ct, test_data=ytest)

    # Saving prediction results
    if len(ytest) > 0:
        save_pred_results(output_file=output_file,
                          predictions=predictions,
                          te_data_signal=te_data_signal)

    return predictions, ytest
コード例 #2
0
def grid_search_AR_single_cycle(data, solar_indices, model_type, options, params, 
                predict_cycle_num):

    params_dict_list_all = create_list_of_dicts(options=options,
                                                model_type=model_type,
                                                param_dict=params)
    val_errors = np.zeros((1, len(params["num_taps"])))
    training_errors = np.zeros((1, len(params["num_taps"])))
    test_errors = np.zeros((1, len(params["num_taps"])))
    #prediction_array = []

    for i, param_options in enumerate(params_dict_list_all):

        p = param_options["num_taps"]
        assert (p > 0) == True, print("Invalid order specified as parameter")
        print("Parameter set used:\n{}".format(param_options))
        
        X, Y = get_msah_training_dataset(data, minimum_idx=solar_indices,tau=options[model_type]["output_size"], p=p)
        xtrain, ytrain, ytest = get_cycle(X, Y, icycle=predict_cycle_num)
        options[model_type]["num_taps"] = p
        model = load_model_with_opts(options, model_type)
        predictions, test_error, val_error, tr_error = train_and_predict_AR(
            model, xtrain, ytrain, ytest)

        val_errors[:, i] = val_error
        training_errors[:, i] = tr_error
        test_errors[:, i] = test_error

    return training_errors, val_errors, test_errors
コード例 #3
0
def train_model_AR(options, model_type, data, minimum_idx, predict_cycle_num, tau=1, output_file=None, use_grid_search=0):

    # Load the model with corresponding options
    if use_grid_search == 0:
        
        model = load_model_with_opts(options, model_type)
        X, Y = get_msah_training_dataset(data, minimum_idx=minimum_idx, tau=options[model_type]["output_size"], p=options[model_type]["num_taps"])
        # predict cycle index = entered predict cycle num - 1
        xtrain, ytrain, ytest = get_cycle(X, Y, icycle=predict_cycle_num)
        # pred of q values
        predictions_ar, test_error, val_error, tr_error = train_and_predict_AR(model, xtrain, ytrain, ytest, tr_to_val_split=0.9, tr_verbose=True)
        
        # Save prediction results in a txt file
        if len(ytest) > 0:
            plot_predictions(predictions=predictions_ar, ytest=ytest, title="AR model predictions with {} taps for cycle index {}".format(
            options[model_type]["num_taps"], predict_cycle_num))
            save_pred_results(output_file=output_file, predictions=predictions_ar, te_data_signal=ytest[:,-1])

    elif use_grid_search == 1:
        
        logfile = './param_selection/{}_gs_cycle_{}_1_logs.txt'.format(model_type, predict_cycle_num)
        
        orig_stdout = sys.stdout
        f_tmp = open(logfile, 'w')
        sys.stdout = f_tmp

        Error_dict = {}
        test_predictions = []
        test_error_optimal = []
        #nval = 1
        num_total_cycles = len(np.diff(minimum_idx))
        #predict_cycle_num_array = list(np.arange(num_total_cycles-nval, num_total_cycles))
        predict_cycle_num_array = [predict_cycle_num]
        params = {"num_taps":list(np.arange(10, 40, 1))} # For Dynamo
        #params = {"num_taps":list(np.arange(5, 50, 2))} # For Solar
        #TODO: Fix array nature of optimal_num_taps_all
        optimal_num_taps_all, training_errors_all, val_errors_all, test_errors_all = grid_search_AR_all_cycles(data=data,
            solar_indices=minimum_idx, model_type=model_type, options=options, params=params, predict_cycle_num_array=predict_cycle_num_array)
        
        Error_dict["validation_errors_with_taps"] = [(float(params["num_taps"][i]), *val_errors_all[:,i]) 
            for i in range(val_errors_all.shape[1])]

        plt.figure()
        plt.plot(params["num_taps"], val_errors_all[0], label="Validation MSE")
        plt.ylabel("MSE")
        plt.xlabel("Number of taps")
        plt.legend()
        plt.title("Error (MSE) vs number of taps")
        plt.show()

        if type(optimal_num_taps_all) != list:
            optimal_num_taps_all = [optimal_num_taps_all]

        Error_dict["optimal_num_taps"] = [float(*optimal_num_taps_all)] #NOTE: Object of int64 is not json serializable

        # Retrain the model again with the optimal value
        for i, optimal_num_taps in enumerate(optimal_num_taps_all):
            
            options[model_type]["num_taps"] = optimal_num_taps
            model = load_model_with_opts(options, model_type)
            X, Y = get_msah_training_dataset(data, minimum_idx=minimum_idx,tau=options[model_type]["output_size"], p=optimal_num_taps)
            xtrain, ytrain, ytest = get_cycle(X, Y, icycle=predict_cycle_num_array[i])
            # pred of q values
            predictions_ar, test_error, val_error, tr_error = train_and_predict_AR(model, xtrain, ytrain, ytest, 
                tr_to_val_split=0.90, tr_verbose=True)
            test_predictions.append(predictions_ar.tolist())
            if len(ytest) > 0:
                
                plot_predictions(predictions=predictions_ar, ytest=ytest, title="AR model predictions with {} taps for cycle index {}".format(
                    optimal_num_taps, predict_cycle_num_array[i]))
                test_error_optimal.append(test_error)
            
            else:
                
                plot_future_predictions(data=data, minimum_idx=minimum_idx, ytrain=ytrain,
                                        predictions=predictions_ar, title='Plot of original timeseries and future predictions for AR model')

        Error_dict["Test_predictions"] = test_predictions
        if len(test_error_optimal) > 0:
            Error_dict["Test_error"] = [test_error_optimal]
        else:
            Error_dict["Test_error"] = []

        with open('./param_selection/gsresults_{}_cycle{}_1.json'.format(model_type, predict_cycle_num_array[i]), 'w+') as fp:
            json.dump(Error_dict, fp, indent=2)
        
        # Saving result files properly
        if len(ytest) > 0:
            save_pred_results(output_file=output_file, predictions=predictions_ar, te_data_signal=ytest[:,-1])

    return predictions_ar
コード例 #4
0
def train_model_RNN(options, model_type, data, minimum_idx, predict_cycle_num, tau=1, output_file=None, use_grid_search=0, Xmax=None, Xmin=None):
    
    #tau_chosen = 1 #Usual case
    #tau_chosen = options[model_type]["output_size"]
    #print("Tau chosen {}".format(tau_chosen))

    # In case parameter tuning is not carried out
    if use_grid_search == 0:

        # Load the model with the corresponding options
        model = load_model_with_opts(options, model_type)
        
        #NOTE: Obtain the data and targets by heuristically setting p
        num_taps_rnn = 22
        
        X, Y = get_msah_training_dataset(data, minimum_idx=minimum_idx, tau = tau, p=num_taps_rnn)

        # Get xtrain, ytrain, ytest
        xtrain, ytrain, ytest = get_cycle(X, Y, icycle=predict_cycle_num)

        # Pred of q values
        predictions_rnn, test_error, val_error, tr_error = train_and_predict_RNN(model, options[model_type], xtrain, ytrain, ytest, 
                                                                                tr_to_val_split=0.90, tr_verbose=True)
        if len(ytest) > 0:
            
            # Normalized predictions in [0, 1]
            #plot_predictions(predictions=predictions_rnn, ytest=ytest, title="{} model predictions with {} taps for cycle index {}".format(
            #    model_type, num_taps_rnn, predict_cycle_num))
            
            # Unnormalized predictions in original scale
            #ytest_un = np.copy(ytest)
            #ytest_un[:,-1] = unnormalize(ytest[:,-1], Xmax, Xmin)
            #plot_predictions(predictions=unnormalize(predictions_rnn, Xmax, Xmin), ytest=ytest_un, title="{} model predictions (unnormalized) with {} taps for cycle index {}".format(
            #    model_type, num_taps_rnn, predict_cycle_num))
            
            # Save prediction results in a txt file
            save_pred_results(output_file=output_file, predictions=predictions_rnn, te_data_signal=ytest[:,-1])
        else:
            
            #plot_future_predictions(data=data, minimum_idx=minimum_idx, ytrain=ytrain, predictions=predictions_rnn,
            #title="Plot of original timeseries and future predictions for {} for cycle index {}".format(
            #    model_type, predict_cycle_num))
            
            #plot_future_predictions(data=unnormalized_data, minimum_idx=minimum_idx, ytrain=ytrain, predictions=unnormalize(predictions_rnn, Xmax, Xmin),
            #title="Plot of original unnormalized timeseries and future predictions for {} for cycle index {}".format(
            #    model_type, predict_cycle_num))
            
            # Save prediction results in a txt file
            save_pred_results(output_file=output_file, predictions=predictions_rnn, te_data_signal=ytest)

    elif use_grid_search == 1:
        
        logfile = './param_selection/{}_gs_cycle_{}_logs_mbatch_diffos.txt'.format(model_type, predict_cycle_num)
        jsonfile = './param_selection/gsresults_{}_cycle{}_mbatch_diffos.json'.format(model_type, predict_cycle_num)

        orig_stdout = sys.stdout
        f_tmp = open(logfile, 'a')
        sys.stdout = f_tmp
        #gs_params = {"n_hidden":[20, 30, 40, 50, 60],
        #             "output_size":[1,5,10],
        #             "num_epochs":[4000]
        #            }
        
        gs_params = {"n_hidden":[40, 45, 50],
                     "n_layers":[1, 2],
                     "output_size":[1],
                     "num_epochs":[3000, 4000]
                    }
        '''
        gs_params = {"n_hidden":[20, 30, 40, 50, 60],
                "n_layers":[1],
                "output_size":[1],
                "num_epochs":[500]
                }
        '''                                                                                           

        gs_list_of_options = create_list_of_dicts(options=options,
                                                model_type=model_type,
                                                param_dict=gs_params)
        
        print("Grid Search to be carried over following {} configs:\n".format(len(gs_list_of_options)))
        val_errors_list = []

        for i, gs_option in enumerate(gs_list_of_options):
            
            print("Config:{} is \n{}".format(i+1, gs_option))
            # Load the model with the corresponding options
            model = RNN_model(
                    input_size=gs_option["input_size"],
                    output_size=gs_option["output_size"],
                    n_hidden=gs_option["n_hidden"],
                    n_layers=gs_option["n_layers"],
                    num_directions=gs_option["num_directions"],
                    model_type=gs_option["model_type"],
                    batch_first=gs_option["batch_first"],
                    lr=gs_option["lr"],
                    num_epochs=gs_option["num_epochs"],
                )
            
            #NOTE: Obtain the data and targets by heuristically setting p
            num_taps_rnn = 22

            X, Y = get_msah_training_dataset(data, minimum_idx=minimum_idx, tau = model.output_size, p=num_taps_rnn)

            # Get xtrain, ytrain, ytest
            xtrain, ytrain, ytest = get_cycle(X, Y, icycle=predict_cycle_num)

            # Pred of q values
            predictions_rnn, _, val_error, tr_error = train_and_predict_RNN(model, gs_option,
                                                                            xtrain, ytrain, ytest, 
                                                                            tr_to_val_split=0.90, 
                                                                            tr_verbose=True,
                                                                            use_grid_search=use_grid_search)
            gs_option["Validation_Error"] = val_error
            gs_option["Training_Error"] = tr_error

            val_errors_list.append(gs_option)
            
        with open(jsonfile, 'w') as f:
            f.write(json.dumps(val_errors_list, cls=NDArrayEncoder, indent=2))

        sys.stdout = orig_stdout
        f.close()

    return predictions_rnn
コード例 #5
0
def train_model_RNN(options,
                    model_type,
                    data,
                    minimum_idx,
                    predict_cycle_num,
                    tau=1,
                    output_file=None,
                    use_grid_search=0,
                    Xmax=None,
                    Xmin=None):

    #tau_chosen = 1 #Usual case
    #tau_chosen = options[model_type]["output_size"]
    #print("Tau chosen {}".format(tau_chosen))

    # In case parameter tuning is not carried out
    if use_grid_search == 0:

        # Load the model with the corresponding options
        num_trials = 10

        #NOTE: Obtain the data and targets by heuristically setting p
        num_taps_rnn = 22

        X, Y = get_msah_training_dataset(data,
                                         minimum_idx=minimum_idx,
                                         tau=tau,
                                         p=num_taps_rnn)
        # Get xtrain, ytrain, ytest
        xtrain, ytrain, ytest = get_cycle(X, Y, icycle=predict_cycle_num)

        if len(ytest) > 0:
            predictions_rnn = np.zeros((num_trials, len(ytest)))
        else:
            predictions_rnn = np.zeros((num_trials, 132))

        for t in range(num_trials):

            print("Trial no. {}".format(t + 1))
            model_t = load_model_with_opts(options, model_type)
            # Pred of q values
            predictions_rnn_t, test_error, val_error, tr_error = train_and_predict_RNN(
                model_t,
                xtrain,
                ytrain,
                ytest,
                tr_to_val_split=0.90,
                tr_verbose=False)
            predictions_rnn[t, :] = predictions_rnn_t.flatten()

    elif use_grid_search == 1:

        logfile = './param_selection/{}_gs_cycle_{}_logs.txt'.format(
            model_type, predict_cycle_num)
        jsonfile = './param_selection/gsresults_{}_cycle{}.json'.format(
            model_type, predict_cycle_num)

        orig_stdout = sys.stdout
        f_tmp = open(logfile, 'w')
        sys.stdout = f_tmp
        #gs_params = {"n_hidden":[20, 30, 40, 50, 60],
        #             "output_size":[1,5,10],
        #             "num_epochs":[4000]
        #            }

        gs_params = {
            "n_hidden": [20, 30, 40, 50, 60],
            "output_size": [1, 5, 10],
            "num_epochs": [4000, 5000]
        }

        gs_list_of_options = create_list_of_dicts(options=options,
                                                  model_type=model_type,
                                                  param_dict=gs_params)

        print("Grid Search to be carried over following {} configs:\n".format(
            len(gs_list_of_options)))
        val_errors_list = []

        for i, gs_option in enumerate(gs_list_of_options):

            print("Config:{} is \n{}".format(i + 1, gs_option))
            # Load the model with the corresponding options
            model = RNN_model(
                input_size=gs_option["input_size"],
                output_size=gs_option["output_size"],
                n_hidden=gs_option["n_hidden"],
                n_layers=gs_option["n_layers"],
                num_directions=gs_option["num_directions"],
                model_type=gs_option["model_type"],
                batch_first=gs_option["batch_first"],
                lr=gs_option["lr"],
                device=gs_option["device"],
                num_epochs=gs_option["num_epochs"],
            )

            #NOTE: Obtain the data and targets by heuristically setting p
            #num_taps_rnn = 22
            num_taps_rnn = 32

            X, Y = get_msah_training_dataset(data,
                                             minimum_idx=minimum_idx,
                                             tau=model.output_size,
                                             p=num_taps_rnn)

            # Get xtrain, ytrain, ytest
            xtrain, ytrain, ytest = get_cycle(X, Y, icycle=predict_cycle_num)

            # Pred of q values
            predictions_rnn, _, val_error, tr_error = train_and_predict_RNN(
                model,
                xtrain,
                ytrain,
                ytest,
                tr_to_val_split=0.90,
                tr_verbose=True,
                use_grid_search=use_grid_search)
            gs_option["Validation_Error"] = val_error
            gs_option["Training_Error"] = tr_error

            val_errors_list.append(gs_option)

        with open(jsonfile, 'w') as f:
            f.write(json.dumps(val_errors_list, indent=2))

        sys.stdout = orig_stdout
        f.close()

    return predictions_rnn