def test_multi_target_sparse_regression():
    X, y = datasets.make_regression(n_targets=3)
    X_train, y_train = X[:50], y[:50]
    X_test = X[50:]

    for sparse in [sp.csr_matrix, sp.csc_matrix, sp.coo_matrix, sp.dok_matrix, sp.lil_matrix]:
        rgr = MultiOutputRegressor(Lasso(random_state=0))
        rgr_sparse = MultiOutputRegressor(Lasso(random_state=0))

        rgr.fit(X_train, y_train)
        rgr_sparse.fit(sparse(X_train), y_train)

        assert_almost_equal(rgr.predict(X_test), rgr_sparse.predict(sparse(X_test)))
Esempio n. 2
0
def test_multi_target_sample_weight_partial_fit():
    # weighted regressor
    X = [[1, 2, 3], [4, 5, 6]]
    y = [[3.141, 2.718], [2.718, 3.141]]
    w = [2., 1.]
    rgr_w = MultiOutputRegressor(SGDRegressor(random_state=0))
    rgr_w.partial_fit(X, y, w)

    # weighted with different weights
    w = [2., 2.]
    rgr = MultiOutputRegressor(SGDRegressor(random_state=0))
    rgr.partial_fit(X, y, w)

    assert_not_equal(rgr.predict(X)[0][0], rgr_w.predict(X)[0][0])
Esempio n. 3
0
def test_multi_target_sample_weights():
    # weighted regressor
    Xw = [[1, 2, 3], [4, 5, 6]]
    yw = [[3.141, 2.718], [2.718, 3.141]]
    w = [2., 1.]
    rgr_w = MultiOutputRegressor(GradientBoostingRegressor(random_state=0))
    rgr_w.fit(Xw, yw, w)

    # unweighted, but with repeated samples
    X = [[1, 2, 3], [1, 2, 3], [4, 5, 6]]
    y = [[3.141, 2.718], [3.141, 2.718], [2.718, 3.141]]
    rgr = MultiOutputRegressor(GradientBoostingRegressor(random_state=0))
    rgr.fit(X, y)

    X_test = [[1.5, 2.5, 3.5], [3.5, 4.5, 5.5]]
    assert_almost_equal(rgr.predict(X_test), rgr_w.predict(X_test))
Esempio n. 4
0
def train_knn_regressor_model(n_neighbors, weights, algorithm,
                              training_examples, training_targets, model_dir):

    neigh_reg = KNeighborsRegressor(n_neighbors=n_neighbors,
                                    weights=weights,
                                    algorithm=algorithm)
    regressor = MultiOutputRegressor(neigh_reg)
    regressor.fit(training_examples, training_targets)
    training_predictions = regressor.predict(training_examples)

    r2_score = regressor.score(training_examples, training_targets)
    print("R^2 score (on training data): %0.3f " % r2_score)

    rmse = math.sqrt(
        metrics.mean_squared_error(training_predictions, training_targets))
    print("Final RMSE (on training data): %0.3f" % rmse)

    res_file = open(model_dir + "/results.txt", "w")
    res_file.write("### Results of the regression ###\n")
    res_file.write("R^2 score (on training data): %0.3f\n" % r2_score)
    res_file.write("Final RMSE (on training data): %0.3f\n" % rmse)
    res_file.close()

    joblib.dump(regressor, model_dir + "/knn_reg.joblib")

    return regressor, r2_score
Esempio n. 5
0
def train_svm_regressor_model(kernel, gamma, coeff, degree, epsilon,
                              training_examples, training_targets, model_dir):

    svr = svm.SVR(kernel=kernel,
                  gamma=gamma,
                  C=coeff,
                  degree=degree,
                  epsilon=epsilon)
    regressor = MultiOutputRegressor(svr)
    regressor.fit(training_examples, training_targets)
    training_predictions = regressor.predict(training_examples)

    r2_score = regressor.score(training_examples, training_targets)
    print("R^2 score (on training data): %0.3f " % r2_score)

    rmse = math.sqrt(
        metrics.mean_squared_error(training_predictions, training_targets))
    print("Final RMSE (on training data): %0.3f" % rmse)

    res_file = open(model_dir + "/results.txt", "w")
    res_file.write("### Results of the regression ###\n")
    res_file.write("R^2 score (on training data): %0.3f\n" % r2_score)
    res_file.write("Final RMSE (on training data): %0.3f\n" % rmse)
    res_file.close()

    joblib.dump(regressor, model_dir + "/svm_reg.joblib")

    return regressor, r2_score
Esempio n. 6
0
    def test_multioutput(self):

        # http://scikit-learn.org/stable/auto_examples/ensemble/plot_random_forest_regression_multioutput.html#sphx-glr-auto-examples-ensemble-plot-random-forest-regression-multioutput-py

        from sklearn.multioutput import MultiOutputRegressor
        from sklearn.ensemble import RandomForestRegressor

        # Create a random dataset
        rng = np.random.RandomState(1)
        X = np.sort(200 * rng.rand(600, 1) - 100, axis=0)
        y = np.array([np.pi * np.sin(X).ravel(), np.pi * np.cos(X).ravel()]).T
        y += (0.5 - rng.rand(*y.shape))

        df = pdml.ModelFrame(X, target=y)

        max_depth = 30

        rf1 = df.ensemble.RandomForestRegressor(max_depth=max_depth,
                                                random_state=self.random_state)
        reg1 = df.multioutput.MultiOutputRegressor(rf1)

        rf2 = RandomForestRegressor(max_depth=max_depth,
                                    random_state=self.random_state)
        reg2 = MultiOutputRegressor(rf2)

        df.fit(reg1)
        reg2.fit(X, y)

        result = df.predict(reg2)
        expected = pd.DataFrame(reg2.predict(X))
        tm.assert_frame_equal(result, expected)
Esempio n. 7
0
def test_multi_target_sample_weights():
    # weighted regressor
    Xw = [[1, 2, 3], [4, 5, 6]]
    yw = [[3.141, 2.718], [2.718, 3.141]]
    w = [2., 1.]
    rgr_w = MultiOutputRegressor(GradientBoostingRegressor(random_state=0))
    rgr_w.fit(Xw, yw, w)

    # unweighted, but with repeated samples
    X = [[1, 2, 3], [1, 2, 3], [4, 5, 6]]
    y = [[3.141, 2.718], [3.141, 2.718], [2.718, 3.141]]
    rgr = MultiOutputRegressor(GradientBoostingRegressor(random_state=0))
    rgr.fit(X, y)

    X_test = [[1.5, 2.5, 3.5], [3.5, 4.5, 5.5]]
    assert_almost_equal(rgr.predict(X_test), rgr_w.predict(X_test))
Esempio n. 8
0
class Solver:
    def __init__(self, func, scopes):
        self.func = func
        self.scopes = np.array(scopes)

        self.model = None

    def train(self, epochs=1e3, verbose=False):
        self.model = MultiOutputRegressor(
            MLPRegressor(solver='lbfgs',
                         alpha=1e-5,
                         hidden_layer_sizes=(100, 30),
                         random_state=1))

        n_variables = len(self.scopes)
        xmin = self.scopes[:, 0]
        xmax = self.scopes[:, 1]

        Xs = list()
        Ys = list()
        if verbose:
            print("Generating training data...", end="")
        for i in range(int(epochs)):
            x = xmin + (xmax - xmin) * np.random.random(n_variables)
            Xs.append(self.func(x))
            Ys.append(x)
            if (i + 1) % int(epochs / 10) == 0 and verbose:
                print(" {value:0.0f}% ".format(value=(i + 1) / int(epochs) *
                                               100),
                      end="")

        if verbose:
            print("Complete!")
        #Xs = np.array(Xs)
        #Ys = np.array(Ys)
        if verbose:
            print("Training model...", end='')
        self.model.fit(Xs, Ys)
        if verbose:
            print("End with R^2: {value:0.4f}".format(
                value=self.model.score(Xs, Ys)))

    def evaluate(self, bs):
        return self.model.predict(bs)

    def evaluate_single(self, b):
        return self.model.predict([b])[0]
def multi_output_regression(train, test, grid, outputs):

    # Multi-Layer Perceptron Regressor
    input_train, input_test, output_train, actual = pd.training_testing_data(
        train, test, grid, outputs)
    print('You are training on %d samples' % (len(input_train)))
    print('You are testing on %d samples' % (len(input_test)))
    multi_output_mlp = MultiOutputRegressor(
        MLPRegressor(solver='adam',
                     learning_rate='adaptive',
                     max_iter=500,
                     early_stopping=True))
    multi_output_mlp.fit(input_train, output_train)
    prediction_mlp = multi_output_mlp.predict(input_test)
    print('Multi-Layer Perceptron')
    print(r'$R^{2}$: %.5f' % (r2_score(actual, prediction_mlp)))
    print('MSE: %.5f' % (mean_squared_error(actual, prediction_mlp)))
    print('RMSE: %.5f' % (np.sqrt(mean_squared_error(actual, prediction_mlp))))

    # Gradient Boosting Regressor
    input_train, input_test, output_train, actual = pd.training_testing_data(
        train, test, grid, outputs)
    print('You are training on %d samples' % (len(input_train)))
    print('You are testing on %d samples' % (len(input_test)))
    multi_output_gbr = MultiOutputRegressor(
        GradientBoostingRegressor(loss='huber'))
    multi_output_gbr.fit(input_train, output_train)
    prediction_gbr = multi_output_gbr.predict(input_test)
    print('Gradient Boosting Regressor')
    print(r'$R^{2}$: %.5f' % (r2_score(actual, prediction_gbr)))
    print('MSE: %.5f' % (mean_squared_error(actual, prediction_gbr)))
    print('RMSE: %.5f' % (np.sqrt(mean_squared_error(actual, prediction_gbr))))

    # Random Forest Regressor
    input_train, input_test, output_train, actual = pd.training_testing_data(
        train, test, grid, outputs)
    print('You are training on %d samples' % (len(input_train)))
    print('You are testing on %d samples' % (len(input_test)))
    multi_output_rfr = MultiOutputRegressor(RandomForestRegressor())
    multi_output_rfr.fit(input_train, output_train)
    prediction_rfr = multi_output_rfr.predict(input_test)
    print('Random Forest Regressor')
    print(r'$R^{2}$: %.5f' % (r2_score(actual, prediction_rfr)))
    print('MSE: %.5f' % (mean_squared_error(actual, prediction_rfr)))
    print('RMSE: %.5f' % (np.sqrt(mean_squared_error(actual, prediction_rfr))))

    return actual, prediction_gbr, prediction_mlp, prediction_rfr
Esempio n. 10
0
def test_multi_target_sparse_regression():
    X, y = datasets.make_regression(n_targets=3)
    X_train, y_train = X[:50], y[:50]
    X_test = X[50:]

    for sparse in [
            sp.csr_matrix, sp.csc_matrix, sp.coo_matrix, sp.dok_matrix,
            sp.lil_matrix
    ]:
        rgr = MultiOutputRegressor(Lasso(random_state=0))
        rgr_sparse = MultiOutputRegressor(Lasso(random_state=0))

        rgr.fit(X_train, y_train)
        rgr_sparse.fit(sparse(X_train), y_train)

        assert_almost_equal(rgr.predict(X_test),
                            rgr_sparse.predict(sparse(X_test)))
Esempio n. 11
0
def multir(request, model):
    bolsa = pd.read_csv("app/data/bolsa.csv",
                        index_col='Date').groupby('Codigo')
    lista = [
        'B3SA3', 'BBDC4', 'BRAP4', 'BRFS3', 'BRKM5', 'BRML3', 'BTOW3', 'CCRO3',
        'CIEL3', 'CMIG4', 'CSAN3', 'CSNA3', 'CYRE3', 'ECOR3', 'EGIE3', 'ELET3',
        'ELET6', 'EMBR3', 'ENBR3', 'EQTL3', 'ESTC3', 'FLRY3', 'GGBR4', 'GOAU4',
        'GOLL4', 'HYPE3', 'IGTA3', 'KROT3', 'ITSA4', 'ITUB4', 'LAME4', 'LREN3',
        'MGLU3', 'MRFG3', 'MRVE3', 'MULT3', 'NATU3', 'PCAR4', 'PETR3', 'PETR4',
        'QUAL3', 'RADL3', 'RENT3', 'SANB11', 'SBSP3', 'TAEE11', 'TIMP3',
        'UGPA3', 'USIM5', 'VALE3', 'VIVT4', 'WEGE3'
    ]

    resultado = []
    for item in lista:
        bolsa = pd.read_csv("app/data/bolsa.csv",
                            index_col='Date').groupby('Codigo')
        dados = bolsa.get_group(item)
        X = dados[['Open', 'High', 'Low', 'Close', 'Volume']]
        y = pd.DataFrame({
            'Alta_real':
            dados['High'].shift(-1).fillna(method='pad'),
            'Baixa_real':
            dados['Low'].shift(-1).fillna(method='pad')
        })
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=0.20,
                                                            shuffle=False,
                                                            random_state=0)
        if (model == 'adr'):
            modelo = "Automatic Relevance Determination Regression"
            #regr_multi = MultiOutputRegressor(svm.SVR())
            regr_multi = MultiOutputRegressor(
                linear_model.ARDRegression(compute_score=True))
        elif (model == 'ada'):
            modelo = "Ada Regressor"
            regr_multi = MultiOutputRegressor(
                AdaBoostRegressor(random_state=0, n_estimators=100))
        elif (model == 'GB'):
            modelo = "GradientBoostingRegressor"
            regr_multi = MultiOutputRegressor(
                GradientBoostingRegressor(random_state=1, n_estimators=10))
        else:
            modelo = "LinerRegression com Bayesian Ridge"
            regr_multi = MultiOutputRegressor(linear_model.BayesianRidge())
        regr_multi = regr_multi.fit(X_train, y_train)
        y_pred = regr_multi.predict(X_test)
        #print(item)
        #print(": ")
        #print(r2_score(y_test, y_pred))
        #print(item,": ", r2_score(y_test, y_pred))
        r = r2_score(y_test, y_pred)
        resultado.append([item, r])
    resultado_geral = pd.DataFrame(resultado).to_html()
    context = {'modelo': modelo, 'resultado': resultado_geral}
    return render(request, 'app/multi.html', context)
Esempio n. 12
0
def SVM(xtr, ytr, xts, yts):
    start = time()
    SVR_RBF = MultiOutputRegressor(
        SVR(verbose=0, kernel='rbf', C=23.5, epsilon=0.01, gamma=0.1))
    SVR_RBF.fit(xtr, ytr)

    tmp = time() - start
    prd = SVR_RBF.predict(xts)
    return mean_euclidean_error(prd, yts), tmp
Esempio n. 13
0
def runBaseLineRegression(model_params,data,estimator):

	#regr = MultiOutputRegressor(sklearn.linear_model.LinearRegression())
	regr = MultiOutputRegressor(estimator)
	#regr = MultiOutputRegressor(sklearn.linear_model.BayesianRidge())
	#regr = MultiOutputRegressor(sklearn.linear_model.Lasso())

	#data
	AP_train,TRP_train = data[0]
	AP_dev,TRP_dev = data[1]

	if model_params["DirectionForward"]:
		X_train,Y_train,X_dev,Y_dev = TRP_train,AP_train,TRP_dev,AP_dev
	else:
		X_train,Y_train,X_dev,Y_dev = AP_train,TRP_train,AP_dev,TRP_dev
		model_params["OutputNames"],model_params["InputNames"] = model_params["InputNames"],model_params["OutputNames"]

	regr.fit(X_train,Y_train)
	Y_dev_pred = regr.predict(X_dev)
	Y_train_pred = regr.predict(X_train)

	if model_params["DirectionForward"]:
		#train
		mse_totoal_train = customUtils.mse_p(ix = (3,6),Y_pred = Y_train_pred,Y_true = Y_train)
		#dev
		mse_totoal_dev = customUtils.mse_p(ix = (3,6),Y_pred = Y_dev_pred,Y_true = Y_dev)

	else:
		mse_totoal_train = mse(Y_train,Y_train_pred,multioutput = 'raw_values')
		mse_totoal_dev = mse(Y_dev,Y_dev_pred,multioutput = 'raw_values')

	
	model_location = os.path.join('models',model_params["model_name"] +  '.json')


	with open(os.path.join('model_params',model_params["model_name"] +  '.json'), 'w') as fp:
		json.dump(model_params, fp, sort_keys=True)

	_ = run_eval_base(model_location,dataset = "train",email = model_params["email"])
	_ = run_eval_base(model_location,dataset = "test",email = model_params["email"])
	mse_total = run_eval_base(model_location,dataset = "dev",email = model_params["email"])

	
	return (mse_totoal_train.tolist(),mse_totoal_dev.tolist(),mse_totoal_train.sum(),mse_totoal_dev.sum())
def make_bayesian_pred(df, next_week, debug=0):
    """
    This method creates predictions using bayesian regression.
    """
    space = {
        'estimator__alpha_1': [1e-10, 1e-5, 1],
        'estimator__alpha_2': [1e-10, 1e-5, 1],
        'estimator__lambda_1': [1e-10, 1e-5, 1],
        'estimator__lambda_2': [1e-10, 1e-5, 1],
        'estimator__n_iter': [10, 300, 1000],
        'estimator__normalize': [True, False],
        'estimator__fit_intercept': [True, False]
    }
    params = {
        'estimator__alpha_1': [1e-10, 1e-5, 1, 5],
        'estimator__alpha_2': [1e-10, 1e-5, 1, 5],
        'estimator__lambda_1': [1e-10, 1e-5, 1, 5],
        'estimator__lambda_2': [1e-10, 1e-5, 1, 5],
        'estimator__n_iter': [10, 300, 1000],
        'estimator__normalize': [True, False],
        'estimator__n_jobs': -1,
        'n_jobs': -1,
        'estimator__fit_intercept': [True, False]
    }
    X_train, X_test, Y_train, Y_test = process_data(df, next_week)
    multi_bay = MultiOutputRegressor(BayesianRidge())
    #multi_bay.set_params(**params)
    #best_random = grid_search(multi_bay, space, next_week, 3, X_train, Y_train)
    multi_bay.fit(X_train, Y_train)
    next_week[Y_train.columns] = multi_bay.predict(next_week[X_train.columns])
    if debug:
        y_pred_untrain = multi_bay.predict(X_train)
        print(next_week)
        print("Score: ", multi_bay.score(X_train, Y_train) * 100)
        print("MSE: ", metrics.mean_squared_error(Y_train, y_pred_untrain))
        print(
            "CV: ",
            ms.cross_val_score(multi_bay,
                               Y_train,
                               y_pred_untrain,
                               cv=10,
                               scoring='neg_mean_squared_error'))
    return next_week
Esempio n. 15
0
class RandomForestSurrogate(Surrogate[ModelConfig], DatasetFeaturesMixin):
    """
    The Random Forest surrogate uses a random forest model to predict model
    performances.
    """
    def __init__(
        self,
        tracker: ModelTracker,
        use_simple_dataset_features: bool = False,
        use_seasonal_naive_performance: bool = False,
        use_catch22_features: bool = False,
        predict: Optional[List[str]] = None,
        output_normalization: OutputNormalization = None,
        impute_simulatable: bool = False,
    ):
        """
        Args:
            tracker: A tracker that can be used to impute latency and number of model parameters
                into model performances. Also, it is required for some input features.
            use_simple_dataset_features: Whether to use dataset features to predict using a
                weighted average.
            use_seasonal_naive_performance: Whether to use the Seasonal Naïve nCRPS as dataset
                featuers. Requires the cacher to be set.
            use_catch22_features: Whether to use catch22 features for datasets statistics. Ignored
                if `use_dataset_features` is not set.
            predict: The metrics to predict. All if not provided.
            output_normalization: The type of normalization to apply to the features of each
                dataset independently. `None` applies no normalization, "quantile" applies quantile
                normalization, and "standard" transforms data to have zero mean and unit variance.
            impute_simulatable: Whether the tracker should impute latency and number of model
                parameters into the returned performance object.
        """
        super().__init__(tracker, predict, output_normalization,
                         impute_simulatable)

        self.config_transformer = ConfigTransformer(
            add_model_features=True,
            add_dataset_statistics=use_simple_dataset_features,
            add_seasonal_naive_performance=use_seasonal_naive_performance,
            add_catch22_features=use_catch22_features,
            tracker=tracker,
        )

        base_estimator = RandomForestRegressor(n_jobs=1)
        self.estimator = MultiOutputRegressor(base_estimator)

    def _fit(self, X: List[Config[ModelConfig]],
             y: npt.NDArray[np.float32]) -> None:
        X_numpy = self.config_transformer.fit_transform(X)
        self.estimator.fit(X_numpy, y)

    def _predict(self,
                 X: List[Config[ModelConfig]]) -> npt.NDArray[np.float32]:
        X_numpy = self.config_transformer.transform(X)
        return self.estimator.predict(X_numpy)
Esempio n. 16
0
class DTRmodel:
    def __init__(self, fl, max_depth=8, num_est=300):
        """
        Initialises new DNN model based on input features_dim, labels_dim, hparams
        :param features_dim: Number of input feature nodes. Integer
        :param labels_dim: Number of output label nodes. Integer
        :param hparams: Dict containing hyperparameter information. Dict can be created using create_hparams() function.
        hparams includes: hidden_layers: List containing number of nodes in each hidden layer. [10, 20] means 10 then 20 nodes.
        """
        self.labels_dim = fl.labels_dim  # Assuming that each task has only 1 dimensional output
        self.labels_scaler = fl.labels_scaler
        self.model = MultiOutputRegressor(
            AdaBoostRegressor(DecisionTreeRegressor(max_depth=max_depth),
                              n_estimators=num_est))
        self.normalise_labels = fl.normalise_labels

    def train_model(self, fl, save_mode=False, plot_name=None):
        training_features = fl.features_c_norm
        if self.normalise_labels:
            training_labels = fl.labels_norm
        else:
            training_labels = fl.labels

        self.model.fit(training_features, training_labels)

        return self.model

    def eval(self, eval_fl):
        features = eval_fl.features_c_norm
        if self.labels_dim == 1:
            y_pred = self.model.predict(features)[:, None]
        else:
            y_pred = self.model.predict(features)
        if self.normalise_labels:
            mse_norm = mean_squared_error(eval_fl.labels_norm, y_pred)
            mse = mean_squared_error(
                eval_fl.labels, self.labels_scaler.inverse_transform(y_pred))
        else:
            mse_norm = -1
            mse = mean_squared_error(eval_fl.labels, y_pred)

        return y_pred, mse, mse_norm
Esempio n. 17
0
def test_multi_regressor():
    # get some noised linear data
    X = np.random.random((1000, 10))
    a = np.random.random((10, 3))
    y = np.dot(X, a) + np.random.normal(0, 1e-3, (1000, 3))
    # fitting
    multioutputregressor = MultiOutputRegressor(xgboost.XGBRegressor(learning_rate=0.01), n_jobs=4).fit(X, y)
    # lgb.LGBMRegressor

    # predicting
    print(np.mean((multioutputregressor.predict(X) - y)**2, axis=0))  # 0.004, 0.003, 0.005
def svm_twostage(X_train,
                 X_test,
                 Y_train,
                 Y_test,
                 num_bus,
                 kernel='poly',
                 degree=2,
                 epsilon=0.01):
    y_train, y_test = Y_train[:, :(2 * num_bus)], Y_test[:, :(2 * num_bus)]
    svr = MultiOutputRegressor(
        SVR(kernel=kernel, degree=degree, C=1,
            epsilon=epsilon)).fit(X_train, y_train)
    train_pred = svr.predict(X_train)
    train_rms = np.sqrt(mean_squared_error(y_train, train_pred))
    train_pe_rms = Penalty(Y_train[:, (2 * num_bus):], train_pred, num_bus)
    test_pred = svr.predict(X_test)
    test_rms = np.sqrt(mean_squared_error(y_test, test_pred))
    test_pe_rms = Penalty(Y_test[:, (2 * num_bus):], test_pred, num_bus)
    result = np.array([train_rms, train_pe_rms, test_rms, test_pe_rms])
    return result, train_pred, test_pred
Esempio n. 19
0
def gbr():
    dataHandler = DataHandler('./inputs/training_data.txt', 5, 60)

    # Train the model - note that we need to wrap the single output gradientBoostingRegressor with the
    # MultiOutputRegressor class to fit multiple output data
    x, y = dataHandler.get_training_data()
    boost_regressor = MultiOutputRegressor(
        GradientBoostingRegressor(learning_rate=0.1,
                                  n_estimators=100,
                                  verbose=0))
    boost_regressor.fit(x, y)

    # Evaluate the model
    x_val, y_val = dataHandler.get_validation_data()
    median_MSE, mean_MSE, max_MSE, min_MSE = score_prediction(
        boost_regressor.predict(x_val), y_val)
    x_pred = boost_regressor.predict(x_val)
    L2 = np.sqrt(np.sum((x_pred - y_val)**2, 1))
    mean_L2 = np.mean(L2)
    median_L2 = np.median(L2)
    return (mean_L2, median_L2, mean_MSE, median_MSE)
Esempio n. 20
0
def firstGBDT(wfTrain, posTrain, wfTest, posTest):
    clf = MultiOutputRegressor(
        ensemble.GradientBoostingRegressor(n_estimators=250, max_depth=20))
    startTime = time.time()
    clf.fit(wfTrain, posTrain)
    endTime = time.time()
    print(
        "The Gradient Boosting Regression spend %.3f seconds to fit the model"
        % (endTime - startTime))
    posPred = clf.predict(wfTest)
    acc = accuracy(posPred, posTest)
    print("Accuracy is: %.3f" % (acc))
Esempio n. 21
0
    def ForestRegressor(self, name):
        sciForest = MultiOutputRegressor(
            RandomForestRegressor(n_estimators=33)
            )

        sciForest.fit(self.X_train, self.Y_train[:,:2])

        predict_test = sciForest.predict(self.X_test)

        MSE = mean_squared_error(predict_test, self.Y_test[:,:2])

        print MSE
Esempio n. 22
0
def compare_process(X,y,res):
    X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=50)
    # min_max_scaler = preprocessing.StandardScaler()#StandardScaler
    # X_train = min_max_scaler.fit_transform(X_train)
    # X_test = min_max_scaler.fit_transform(X_test)
    for c in clfs:
        mt = MultiOutputRegressor(clfs[c])
        mt.fit(X_train,y_train)
        y_pred = mt.predict(X_test)
        # res.append(mean_absolute_error(y_test, y_pred))
        res[c].append(np.average(np.apply_along_axis(np.linalg.norm,1,y_test-y_pred)))
    print res
Esempio n. 23
0
def stratCV(model, nfolds, train_X, train_Y, **params):
    mskf = MultilabelStratifiedKFold(n_splits=nfolds, shuffle=True)

    for train_index, valid_index in mskf.split(train_X, train_Y):
        print("TRAIN:", train_index, "VALID:", valid_index)
        X_train, X_valid = train_X[train_index], train_X[valid_index]
        Y_train, Y_valid = train_Y[train_index], train_Y[valid_index]

    m = MultiOutputRegressor(model(**params))
    m.fit(X_train, Y_train)
    y_preds = m.predict(X_valid)
    y_score = log_loss(Y_valid, y_preds)
    print(y_score)
Esempio n. 24
0
    def MLPRegressor(self, name):
        sciMLP = MultiOutputRegressor(
            MLPRegressor(hidden_layer_sizes=(66,), 
            activation='logistic', solver='adam', max_iter=200, batch_size=50)
            )

        sciMLP.fit(self.X_train, self.Y_train[:,:2])

        predict_test = sciMLP.predict(self.X_test)

        MSE = mean_squared_error(predict_test, self.Y_test[:,:2])

        print MSE
Esempio n. 25
0
class DTRmodel:
    def __init__(self, fl, max_depth=8, num_est=300, chain=False):
        """
        Initialises new DTR model
        :param fl: fl class
        :param max_depth: max depth of each tree
        :param num_est: Number of estimators in the ensemble of trees
        :param chain: regressor chain (True) or independent multi-output (False)
        """
        self.labels_dim = fl.labels_dim
        self.labels_scaler = fl.labels_scaler
        if chain:
            self.model = RegressorChain(
                AdaBoostRegressor(DecisionTreeRegressor(max_depth=max_depth),
                                  n_estimators=num_est))
        else:
            self.model = MultiOutputRegressor(
                AdaBoostRegressor(DecisionTreeRegressor(max_depth=max_depth),
                                  n_estimators=num_est))
        self.normalise_labels = fl.normalise_labels

    def train_model(self, fl, *args, **kwargs):
        # *args, **kwargs is there for compatibility with the KModel class
        training_features = fl.features_c_norm
        if self.normalise_labels:
            training_labels = fl.labels_norm
        else:
            training_labels = fl.labels
        self.model.fit(training_features, training_labels)
        return self

    def predict(self, eval_fl):
        features = eval_fl.features_c_norm
        if self.labels_dim == 1:  # If labels is 1D output, the prediction will be a 1D array not 2D
            y_pred = self.model.predict(features)[:, None]
        else:
            y_pred = self.model.predict(features)
        return y_pred  # If labels is normalized, the prediction here is also normalized!
def _scikit_model(model, X_train, y_train, X_test, multiregressor=False):
    print("Fitting: " + model)

    clf = models[model]

    if multiregressor:
        regr = MultiOutputRegressor(clf)
    else:
        regr = clf

    regr.fit(X_train, y_train)
    predictions = regr.predict(X_test)

    return predictions
Esempio n. 27
0
    def test_fit_as_multi_output_regressor_if_target_to_feature_none(
            self, estimator, X_y):
        X, y = X_y
        multi_feature_multi_output_regressor = MultiFeatureMultiOutputRegressor(
            estimator)
        multi_feature_multi_output_regressor.fit(X, y)

        multi_output_regressor = MultiOutputRegressor(estimator)
        multi_output_regressor.fit(X, y)

        assert_almost_equal(
            multi_feature_multi_output_regressor.predict(X),
            multi_output_regressor.predict(X),
        )
Esempio n. 28
0
class SvmModel (object): 

    def __init__(self) :
        # Build Model
        self.model = MultiOutputRegressor(GradientBoostingRegressor(random_state=0))
        
    #Train model
    def trainModel(self,trainX,trainY):        
        self.model = self.model.fit(trainX, trainY)
   
    #Predict 
    def predict(self,testX):        
        result = self.model.predict(testX)
        return result
Esempio n. 29
0
 def MOR_model(x_train,y_train,x_test):  
     
     '''
     @The function apply the Logistic Regressor into the training, testing data
     @Input x_train, x_test: the first 18 hours measurements, y_test: the following 8 hours measurements in training data 
     @Output is the predicted measurements of the test set 
     '''
     
     MOR_time_start = time.clock()
     MOR = MultiOutputRegressor(GradientBoostingRegressor(random_state=0))
     MOR = MOR.fit(x_train, y_train)
     MOR_time_elapsed = (time.clock() - MOR_time_start)
     exe_time.append(MOR_time_elapsed)
     return(MOR.predict(x_test))
Esempio n. 30
0
def plot_learning_curve(C, gamma, epsilon):
    kfold = KFold(n_splits=splits_kfold, random_state=None, shuffle=True)
    for traing_index, test_index in kfold.split(X):
        x_tr = X[traing_index]
        y_tr = Y[traing_index]
        x_ts = X[test_index]
        y_ts = Y[test_index]
        all_loss = []
        all_loss_tr = []
        n_examples = []
        for step in range(2, 102, 2):
            ind_x = int(step * (len(x_tr) / 100))
            ind_y = int(step * (len(y_tr) / 100))
            this_x_tr = x_tr[0:ind_x, :]
            this_y_tr = y_tr[0:ind_y, :]
            svr = SVR(C=C, gamma=gamma, epsilon=epsilon, verbose=False)
            mor = MultiOutputRegressor(svr)
            mor.fit(this_x_tr, this_y_tr)
            y_pred_tr = mor.predict(this_x_tr)
            y_pred = mor.predict(x_ts)
            this_loss = loss_fn(y_pred, y_ts)
            this_loss_tr = loss_fn(y_pred_tr, this_y_tr)
            n_examples.append(int(step * (len(x_tr) / 100)))
            all_loss.append(this_loss)
            all_loss_tr.append(this_loss_tr)
        plt.plot(n_examples, all_loss_tr)
        plt.plot(n_examples, all_loss, '--')
        plt.title("Learning Curve SVM C=" + str(C) + " gamma=" + str(gamma) +
                  " epsilon=" + str(epsilon))
        plt.xlabel("Number of training examples")
        plt.ylabel("Loss (Mean Euclidian Distance)")
        plt.legend(["Loss on training set", "Loss on validation set"])
        plt.savefig('./svm_learning_curve_' + str(C) + '_' + str(gamma) + '_' +
                    str(epsilon) + '.png',
                    dpi=500)
        plt.close()
        return
Esempio n. 31
0
class GPR:
    def __init__(self):
        pass

    def set_data(self, features, targets, D, denom_sq):
        self.features = features
        self.targets = targets
        self.D = D
        self.inv_denom_sq = denom_sq**-1

    def train(self, config):
        input_size = self.features['train'].shape[1]

        alpha = 1e-9  # 1e-5
        # IMPORTANT: if no kernel is specified, a constant one will be used per default.
        # The constant kernels hyperparameters will NOT be optimized!
        #kernel = 1.0 * RBF(length_scale=1.0, length_scale_bounds=(1e-2, 1e2))
        kernel = 0.01 * RBF(length_scale=[0.1]*input_size, length_scale_bounds=(1e-2, 1e+2)) \
                + WhiteKernel(noise_level=alpha, noise_level_bounds=(1e-10, 1e0))
        regressor = GaussianProcessRegressor(kernel=kernel,
                                             normalize_y=False,
                                             n_restarts_optimizer=10)
        self.model = MultiOutputRegressor(regressor)

        self.model.fit(self.features['train'], self.targets['train'])
        # Print learnt hyperparameters
        #for e in self.model.estimators_:
        #    print(e.kernel_.get_params())

    def evaluate(self, features):
        return self.model.predict(features)

    def test(self):
        f = self.features['test']
        t = self.targets['test']
        q_rb = self.evaluate(f)
        eps_reg_sq = np.sum(
            (self.D * (q_rb - t))**2) * self.inv_denom_sq / f.shape[0]
        return eps_reg_sq**0.5

    def save(self, model_dir, component):
        path = os.path.join(model_dir, 'GPR', component, 'model')
        with open(path, 'wb+') as f:
            pickle.dump(self.model, f)

    def load(self, model_dir, component):
        path = os.path.join(model_dir, 'GPR', component, 'model')
        with open(path, 'rb') as f:
            self.model = pickle.load(f)
def cross_validate(allData, material, regressior, time_window, metric_fun):
    ts_market_data_by_molecule, ts_sales_data, ts_market_data, stock, market_percentage = allData.get_dataframes_for_material(
        str(material))
    df_market = fg.transform_time_series(
        ts_market_data_by_molecule[ts_market_data_by_molecule.columns[0]])
    df_internal = fg.transform_time_series(
        ts_sales_data[ts_sales_data.columns[0]])
    df_external = fg.transform_time_series(
        ts_market_data[ts_market_data.columns[0]])
    df_external = df_external.append(fg.add_latest_month(df_external))
    df_market = df_market.append(fg.add_latest_month(df_market))
    df_joined = df_internal.add_suffix('_int').join(
        df_external.drop(columns=['quarter', 'month', 'year', 't']).
        add_suffix('_ext')).join(
            df_market.drop(
                columns=['quarter', 'month', 'year', 't']).add_suffix('_comp'))
    X_test = df_joined[-time_window:]
    y_test = X_test[['t_int', 't-1_ext', 't-1_comp']]
    X_test = X_test.drop(columns=['t_int', 't-1_ext', 't-1_comp'])
    X_test = X_test.head(n=1)
    X_train = df_joined[:-time_window]
    y_train = X_train[['t_int', 't-1_ext', 't-1_comp']]
    X_train = X_train.drop(columns=['t_int', 't-1_ext', 't-1_comp'])
    last_X_int = df_internal[:-time_window].tail(n=1)
    last_X_ext = df_external[:-time_window].tail(n=1)
    last_X_comp = df_market[:-time_window].tail(n=1)
    reg = MultiOutputRegressor(regressior)
    reg.fit(X_train, y_train)
    date_rng = pd.date_range(start=y_test.index[-1].date(),
                             end=y_test.index[-1].date() + timedelta(days=365),
                             freq='MS')
    date_rng = y_test.index
    Y_pred = []
    for i in range(0, time_window):
        X_to_predict = fg.get_x_to_predict_all_data(last_X_int, last_X_ext,
                                                    last_X_comp)
        X_to_predict = X_to_predict[X_test.columns]
        Y_pred_to_add = reg.predict(X_to_predict)
        last_X_int = fg.add_latest_month(last_X_int)
        last_X_int['t'] = float(Y_pred_to_add[0][0])
        last_X_ext = fg.add_latest_month(last_X_ext)
        last_X_ext['t-1'] = float(Y_pred_to_add[0][1])
        last_X_comp = fg.add_latest_month(last_X_comp)
        last_X_comp['t-1'] = float(Y_pred_to_add[0][2])
        Y_pred.append(Y_pred_to_add[0][0])

    Y_pred = pd.Series(Y_pred).astype(float)
    Y_pred.index = date_rng
    return metric_fun(X_test, Y_pred['t_int'])
Esempio n. 33
0
def Regression(X,Y,groups,grade):
    lpgo = GroupKFold(n_splits=14)
    MAE = []
    ECM = []
    MAPE = []
    R2_SCORE = []
    N = np.size(Y[0])
    
    #Se añaden los grados del polinomio a las caracteristicas
    poly = PolynomialFeatures(degree=grade)
    X = poly.fit_transform(X)
    
    for train_index,test_index in lpgo.split(X, Y, groups):
      X_train, X_test = X[train_index], X[test_index]
      y_train, y_test = Y[train_index], Y[test_index]
    
      #Normalización de los datos
      sc_X = StandardScaler()
      X_train = sc_X.fit_transform(X_train)
      X_test = sc_X.transform(X_test)
    
      # Ajustar el modelo a la regresión simple
      regressor = LinearRegression()
      multiple_output_regressor = MultiOutputRegressor(regressor)
    
      multiple_output_regressor.fit(X_train, y_train)
    
      # Predecir los resultados de prueba
      y_pred = multiple_output_regressor.predict(X_test)
    
      #print("R2-1",multiple_output_regressor.score(X_test,y_test[:,0]))
      #print("R2-2",multiple_output_regressor.score(X_test,y_test[:,1]))
      ECM.append(mean_squared_error(y_test,y_pred,multioutput='raw_values'))
      MAE.append(mean_absolute_error(y_test,y_pred,multioutput='raw_values'))
      R2_SCORE.append(r2_score(y_test, y_pred, multioutput='raw_values'))
      m = []
      m.append(np.mean(np.abs((y_test[:,0] - y_pred[:,0]) / y_test[:,0])) * 100)
      m.append(np.mean(np.abs((y_test[:,1] - y_pred[:,1]) / y_test[:,1])) * 100)
      MAPE.append(m)
    
      ECM_matrix = np.asmatrix(ECM)
      MAE_matrix = np.asmatrix(MAE)
      MAPE_matrix = np.asmatrix(MAPE)
      R2_matrix = np.asmatrix(R2_SCORE)
    for i in range(0,N):
      print("El error cuadratrico medio de validación para la salida", (i+1),"es (ECM):", np.mean(ECM_matrix[:,i]),"+-",np.std(ECM_matrix[:,i]))
      print("El error medio absoluto de validación para la salida", (i+1),"es (MAE):", np.mean(MAE_matrix[:,i]),"+-",np.std(MAE_matrix[:,i]))
      print("El porcentaje de error medio absoluto de validación para la salida", (i+1),"es (MAPE):", np.mean(MAPE_matrix[:,i]),"%" ,"+-",np.std(MAPE_matrix[:,i]))
      print("Coeficiente de determinación para la salida", (i+1),"es (R2):", np.around(np.mean(R2_matrix[:,i])),"%","+-",np.around(np.std(R2_matrix[:,i]),decimals=5))
Esempio n. 34
0
class QLearningGBM(Model):
    def __init__(self, newEstimatorsPerLearn):
        super(QLearningGBM, self).__init__()
        self.newEstimatorsPerLearn = newEstimatorsPerLearn
        self.GBM = MultiOutputRegressor(GradientBoostingRegressor(
            warm_start=True,
            verbose=True,
            n_estimators=newEstimatorsPerLearn,
            learning_rate=0.01),
                                        n_jobs=-1)

    def predict(self, X):
        try:
            return self.GBM.predict(
                X)  # Vector with estimated points for all actions
        except NotFittedError as e:
            return np.random.rand(15)

    def learn(self, X, ACTION, Y, learnScale=False):
        Y_LEARN = self.getYOnlyForActionTaken(X, ACTION, Y)
        self.GBM.estimator.n_estimators += self.newEstimatorsPerLearn
        print "TOTAL TREES", self.GBM.estimator.n_estimators
        self.GBM.fit(X, Y_LEARN)

    def getYOnlyForActionTaken(self, X, ACTION, Y):
        predictionRows = list()
        for i in range(X.shape[0]):
            try:
                allActionPredictions = self.GBM.predict(X[i, :].reshape(
                    1, -1))[0]  # Current predictions
            except NotFittedError as e:
                allActionPredictions = np.random.rand(15)
            allActionPredictions[ACTION[i]] = Y[
                i]  # Only change the prediction for the action that was taken to the expected Y value
            predictionRows += [allActionPredictions]
        return np.array(predictionRows)
Esempio n. 35
0
y += (0.5 - rng.rand(*y.shape))

X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    train_size=400,
                                                    random_state=4)

max_depth = 30
regr_multirf = MultiOutputRegressor(RandomForestRegressor(max_depth=max_depth,
                                                          random_state=0))
regr_multirf.fit(X_train, y_train)

regr_rf = RandomForestRegressor(max_depth=max_depth, random_state=2)
regr_rf.fit(X_train, y_train)

# Predict on new data
y_multirf = regr_multirf.predict(X_test)
y_rf = regr_rf.predict(X_test)

# Plot the results
plt.figure()
s = 50
a = 0.4
plt.scatter(y_test[:, 0], y_test[:, 1], edgecolor='k',
            c="navy", s=s, marker="s", alpha=a, label="Data")
plt.scatter(y_multirf[:, 0], y_multirf[:, 1], edgecolor='k',
            c="cornflowerblue", s=s, alpha=a,
            label="Multi RF score=%.2f" % regr_multirf.score(X_test, y_test))
plt.scatter(y_rf[:, 0], y_rf[:, 1], edgecolor='k',
            c="c", s=s, marker="^", alpha=a,
            label="RF score=%.2f" % regr_rf.score(X_test, y_test))
plt.xlim([-6, 6])