# combine the ensemble predictions into a single prediction for each trial of each hyperparam setting
    #
    predictions = []
    for result in results:
        predictor_results = result[2]  #M* [(m_ij,s_ij)] each of len trials
        if SOFTVOTING:
            bagging_predictions = trials_soft_prediction(
                predictor_results, Config.trials)
        else:
            bagging_predictions = trials_hard_prediction(
                predictor_results, Config.trials)

        predictions.append([result[0], result[1], bagging_predictions])

    ## compute the normalized error for all hyperparam runs & all trials
    V_0 = generate_V_0(100000, Config.Delta, Config.d)
    logger.info(f"V_0 = {V_0}")
    normalized_error_results = []
    for result in predictions:
        errors = []
        for index, prediction in enumerate(result[2]):
            error = normalized_error_VT(prediction,
                                        DataContainer.y_test_list[index], V_0)
            errors.append(error)
        normalized_error_results.append([result[0], result[1], errors])

    print(normalized_error_results)
    if WRITEBACK:
        write_results("mpi_regularized_bagging", normalized_error_results,
                      Config)
Esempio n. 2
0
    """

    ## generate Training set, Test set & V_0s
    X_train, y_train = generate_train_set(Config.N_train, Config.Delta,
                                          Config.d)
    X_test, y_test = generate_test_set(Config.N_test, Config.Delta, Config.d)

    V_0_train = generate_V_0(Config.N_train, Config.Delta, Config.d)
    V_0_test = generate_V_0(Config.N_test, Config.Delta, Config.d)

    logger.info(f"V_0_test = {V_0_test}")

    reference = create_GPR(Config.N_train)
    reference.fit(X_train, y_train)
    f_X = reference.predict(X_test)
    reference_error = normalized_error_VT(f_X, y_test, V_0_test)
    logger.info(f"reference error : {reference_error}")

    ## MPI execute
    results = []

    with MPIPoolExecutor() as executor:
        futures = []
        # evaluate model for all points in grid by creating new mpi node
        for r in Config.Ratios:
            logger.info(f"starting evaluation for ratio {r}")
            future = executor.submit(evaluate_boosting,
                                     X_train.copy(),
                                     y_train.copy(),
                                     X_test.copy(),
                                     y_test.copy(),
Esempio n. 3
0
def evaluate_model(base_model,
                   hyperparams,
                   X_train,
                   y_train,
                   d,
                   DeltaT,
                   trials,
                   N_test,
                   samples_generator,
                   V_0=None,
                   logger=None,
                   random_seeds=None):
    """
    Sets hyperparameters of the model, trains it first and evaluates it afterwards. 
    Used in GridSearch settings

    :param base_model: model to used
    :type base_model: sklearn model
    :param hyperparams: dict of hyperparameters, containing their exact name as key 
    :type hyperparams: dict{str: value}
    :param X_train: Training set
    :type X_train: np.ndarray(N x (d*T))
    :param y_train: labels of training set
    :type y_train: np.ndarray( N x 1 )
    :param d: number of stocks 
    :type d: integer
    :param DeltaT: list of the delta Ts between the driver moments
    :type DeltaT: list of length T
    :param trials: number of times to evaluate the model
    :type trials: integer
    :param N_test: number of samples for each evaluation
    :type N_test: integer
    :param samples_generator: stochastic generator used to generate test samples 
    :type samples_generator: StochasticModelBase
    :param V_0: V_0 value, could be obtained using larger set to improve accuracy of the results 
    :type V_0: optional float
    :param logger: Optional, logger for debug information 
    :param seeds: Optional, list of seeds (in size of trials),
                so that the same seed is used to generate the testset over multiple instances of the gridsearch
    :return: list containing the normalized errrors
    :rtype: list of size trials
    """

    errors = []
    try:
        if logger:
            logger.debug(
                f" {hyperparams} -> thread id = {threading.current_thread().ident}"
            )
        else:
            print(
                f" {hyperparams} -> thread id = {threading.current_thread().ident}"
            )
        # create the model and set the hyperparams
        model = clone(base_model)
        for k, v in hyperparams.items():
            setattr(model, k, v)
        # train the model
        model.fit(X_train, y_train)

        # evaluate the model
        for trial in range(trials):
            if random_seeds:
                np.random.seed(random_seeds[trial])
            s_test = samples_generator(N_test, d, DeltaT)
            s_test.generate_samples()
            y_test = s_test.y
            X_test = s_test.X
            S_test = s_test.S

            Flattened_X_test = flatten_X(X_test)

            V_T = y_test
            if V_0 is None:
                V_0 = s_test.generate_true_V(0)
                V_0 = V_0.mean()
            y_hat = model.predict(Flattened_X_test)

            error = normalized_error_VT(y_hat, V_T, V_0).item()
            if logger:
                logger.debug(f"{hyperparams} , {trial} -> {error}")
            else:
                print(f"{hyperparams} , {trial} -> {error}")
            # add normalized error to the list
            errors.append(error)
        if logger:
            logger.info(f"{hyperparams} -> {errors}")
        else:
            print(f"{hyperparams} -> {errors}")
        return errors

    except Exception as e:
        if logger:
            logger.warn(traceback.format_exc())
        else:
            traceback.print_exc()
        return None