Esempio n. 1
    def _regularize_data(self, Xtrain, Xval, Xtest, Ytrain, Yval):
        """Internal method to scale the input/outputs of the DNN.

        It scales the inputs of the training, validation, and test datasets
        and the outputs of the training and validation datasets.
        Xtrain : numpy.array
            Input of the training dataset
        Xval : numpy.array
            Input of the validation dataset
        Xtest : numpy.array
            Input of the test dataset
        Ytrain : numpy.array
            Output of the training dataset
        Yval : numpy.array
            Output of the validation dataset
            List containing the five arrays but scaled

        # If required, datasets are scaled
        if self.best_hyperparameters['scaleX'] in [
                'Norm', 'Norm1', 'Std', 'Median', 'Invariant'
            [Xtrain, Xval,
             Xtest], _ = scaling([Xtrain, Xval, Xtest],

        if self.best_hyperparameters['scaleY'] in [
                'Norm', 'Norm1', 'Std', 'Median', 'Invariant'
             Yval], self.scaler = scaling([Ytrain, Yval],
            self.scaler = None

        return Xtrain, Xval, Xtest, Ytrain, Yval
Esempio n. 2
    def recalibrate(self, Xtrain, Ytrain):
        """Function to recalibrate the LEAR model. 
        It uses a training (Xtrain, Ytrain) pair for recalibration
        Xtrain : numpy.array
            Input in training dataset. It should be of size *[n,m]* where *n* is the number of days
            in the training dataset and *m* the number of input features
        Ytrain : numpy.array
            Output in training dataset. It should be of size *[n,24]* where *n* is the number of days 
            in the training dataset and 24 are the 24 prices of each day
            The prediction of day-ahead prices after recalibrating the model        

        # # Applying Invariant, aka asinh-median transformation to the prices
        [Ytrain], self.scalerY = scaling([Ytrain], 'Invariant')

        # # Rescaling all inputs except dummies (7 last features)
        [Xtrain_no_dummies], self.scalerX = scaling([Xtrain[:, :-7]], 'Invariant')
        Xtrain[:, :-7] = Xtrain_no_dummies

        self.models = {}
        for h in range(24):

            # Estimating lambda hyperparameter using LARS
            param_model = LassoLarsIC(criterion='aic', max_iter=2500)
            param =, Ytrain[:, h]).alpha_

            # Re-calibrating LEAR using standard LASSO estimation technique
            model = Lasso(max_iter=2500, alpha=param)
  , Ytrain[:, h])

            self.models[h] = model
Esempio n. 3
def _hyperopt_objective(hyperparameters, trials, trials_file_path, max_evals, nlayers, dfTrain, dfTest, 
                        shuffle_train, dataset, data_augmentation, 
                        calibration_window, n_exogenous_inputs):
    """Function that defines the hyperparameter optimization objective/loss
    This function receives as input a set of hyperparameters, trains a DNN using them,
    and returns the performance of the DNN for the selected hyperparameters in a validation

    hyperparameters : dict
        A dictionary provided by hyperopt indicating whether each hyperparameter/feature is selected
    trials : hyperopt.Trials
        The trials object that stores the hyperparameter optimization runs
    trials_file_path : str
        The path to store the trials object
    max_evals : int
        Maximum number of iterations for hyperparameter optimization
    nlayers : int
        Number of layers in the DNN model
    dfTrain : pandas.DataFrame
        Dataframe containing the training data
    dfTrain : pandas.DataFrame
        Dataframe containing the testing data
    shuffle_train : bool
        Boolean that selects whether the training and validation datasets are shuffled
    dataset : TYPE
    data_augmentation : TYPE
    calibration_window : TYPE
    n_exogenous_inputs : TYPE
        A dictionary summarizing the result of the hyperparameter run

    # Re-defining the training dataset based on the calibration window. The calibration window
    # can be given as an external parameter. If the value 0 is given, the calibration window
    # is included as a hyperparameter to optimize
    dfTrain_cw = dfTrain.loc[dfTrain.index[-1] - pd.Timedelta(weeks=52) * calibration_window +

    # Saving hyperoptimization state and printing message
    pc.dump(trials, open(trials_file_path, "wb"))
    if trials.losses()[0] is not None:

        MAEVal = trials.best_trial['result']['MAE Val']
        MAETest = trials.best_trial['result']['MAE Test']

        sMAPEVal = trials.best_trial['result']['sMAPE Val']
        sMAPETest = trials.best_trial['result']['sMAPE Test']
        print('\n\nTested {}/{} iterations.'.format(len(trials.losses()) - 1,

        print('Best MAE - Validation Dataset')            
        print("  MAE: {:.1f} | sMAPE: {:.2f} %".format(MAEVal, sMAPEVal))
        print('\nBest MAE - Test Dataset')
        print("  MAE: {:.1f} | sMAPE: {:.2f} %".format(MAETest, sMAPETest))

    # Defining X,Y datasets
    Xtrain, Ytrain, Xval, Yval, Xtest, Ytest, indexTest = \
        _build_and_split_XYs(dfTrain=dfTrain_cw, dfTest=dfTest, features=hyperparameters, 
                          shuffle_train=shuffle_train, hyperoptimization=True,
                          data_augmentation=data_augmentation, n_exogenous_inputs=n_exogenous_inputs)
    # If required, datasets are scaled
    if hyperparameters['scaleX'] in ['Norm', 'Norm1', 'Std', 'Median', 'Invariant']:
        [Xtrain, Xval, Xtest], _ = scaling([Xtrain, Xval, Xtest], hyperparameters['scaleX'])

    if hyperparameters['scaleY'] in ['Norm', 'Norm1', 'Std', 'Median', 'Invariant']:
        [Ytrain, Yval], scaler = scaling([Ytrain, Yval], hyperparameters['scaleY'])
        scaler = None

    neurons = [int(hyperparameters['neurons' + str(k)]) for k in range(1, nlayers + 1)
               if int(hyperparameters['neurons' + str(k)]) >= 50]

    # Initialize model
    forecaster = DNNModel(neurons=neurons, n_features=Xtrain.shape[-1], 
                     dropout=hyperparameters['dropout'], batch_normalization=hyperparameters['batch_normalization'], 
                     lr=hyperparameters['lr'], verbose=False,
                     optimizer='adam', activation=hyperparameters['activation'],
                     epochs_early_stopping=20, scaler=scaler, loss='mae',
                     initializer=hyperparameters['init']), Ytrain, Xval, Yval)

    Yp = forecaster.predict(Xval).squeeze()
    if hyperparameters['scaleY'] in ['Norm', 'Norm1', 'Std', 'Median', 'Invariant']:
        Yval = scaler.inverse_transform(Yval)
        Yp = scaler.inverse_transform(Yp)

    mae_validation = np.mean(MAE(Yval, Yp))
    smape_validation = np.mean(sMAPE(Yval, Yp)) * 100

    # If required, datasets are normalized
    Yp = forecaster.predict(Xtest).squeeze()
    if hyperparameters['scaleY'] in ['Norm', 'Norm1', 'Std', 'Median', 'Invariant']:
        Yp = scaler.inverse_transform(Yp).squeeze()

    maeTest = np.mean(MAE(Ytest, Yp)) 
    smape_test = np.mean(sMAPE(Ytest, Yp)) * 100

    # The test dataset is returned for directly evaluating the models without recalibration
    # while performing hyperopt. However, the hyperparameter search is performed using a validation
    # dataset
    return_values = {'loss': mae_validation, 'MAE Val': mae_validation, 'MAE Test': maeTest,
                     'sMAPE Val': smape_validation, 'sMAPE Test': smape_test, 
                     'status': STATUS_OK}
    return return_values