def test_multi_target_sparse_regression(): X, y = datasets.make_regression(n_targets=3) X_train, y_train = X[:50], y[:50] X_test = X[50:] for sparse in [sp.csr_matrix, sp.csc_matrix, sp.coo_matrix, sp.dok_matrix, sp.lil_matrix]: rgr = MultiOutputRegressor(Lasso(random_state=0)) rgr_sparse = MultiOutputRegressor(Lasso(random_state=0)) rgr.fit(X_train, y_train) rgr_sparse.fit(sparse(X_train), y_train) assert_almost_equal(rgr.predict(X_test), rgr_sparse.predict(sparse(X_test)))
def test_multi_target_sample_weight_partial_fit(): # weighted regressor X = [[1, 2, 3], [4, 5, 6]] y = [[3.141, 2.718], [2.718, 3.141]] w = [2., 1.] rgr_w = MultiOutputRegressor(SGDRegressor(random_state=0)) rgr_w.partial_fit(X, y, w) # weighted with different weights w = [2., 2.] rgr = MultiOutputRegressor(SGDRegressor(random_state=0)) rgr.partial_fit(X, y, w) assert_not_equal(rgr.predict(X)[0][0], rgr_w.predict(X)[0][0])
def test_multi_target_sample_weights(): # weighted regressor Xw = [[1, 2, 3], [4, 5, 6]] yw = [[3.141, 2.718], [2.718, 3.141]] w = [2., 1.] rgr_w = MultiOutputRegressor(GradientBoostingRegressor(random_state=0)) rgr_w.fit(Xw, yw, w) # unweighted, but with repeated samples X = [[1, 2, 3], [1, 2, 3], [4, 5, 6]] y = [[3.141, 2.718], [3.141, 2.718], [2.718, 3.141]] rgr = MultiOutputRegressor(GradientBoostingRegressor(random_state=0)) rgr.fit(X, y) X_test = [[1.5, 2.5, 3.5], [3.5, 4.5, 5.5]] assert_almost_equal(rgr.predict(X_test), rgr_w.predict(X_test))
def train_knn_regressor_model(n_neighbors, weights, algorithm, training_examples, training_targets, model_dir): neigh_reg = KNeighborsRegressor(n_neighbors=n_neighbors, weights=weights, algorithm=algorithm) regressor = MultiOutputRegressor(neigh_reg) regressor.fit(training_examples, training_targets) training_predictions = regressor.predict(training_examples) r2_score = regressor.score(training_examples, training_targets) print("R^2 score (on training data): %0.3f " % r2_score) rmse = math.sqrt( metrics.mean_squared_error(training_predictions, training_targets)) print("Final RMSE (on training data): %0.3f" % rmse) res_file = open(model_dir + "/results.txt", "w") res_file.write("### Results of the regression ###\n") res_file.write("R^2 score (on training data): %0.3f\n" % r2_score) res_file.write("Final RMSE (on training data): %0.3f\n" % rmse) res_file.close() joblib.dump(regressor, model_dir + "/knn_reg.joblib") return regressor, r2_score
def train_svm_regressor_model(kernel, gamma, coeff, degree, epsilon, training_examples, training_targets, model_dir): svr = svm.SVR(kernel=kernel, gamma=gamma, C=coeff, degree=degree, epsilon=epsilon) regressor = MultiOutputRegressor(svr) regressor.fit(training_examples, training_targets) training_predictions = regressor.predict(training_examples) r2_score = regressor.score(training_examples, training_targets) print("R^2 score (on training data): %0.3f " % r2_score) rmse = math.sqrt( metrics.mean_squared_error(training_predictions, training_targets)) print("Final RMSE (on training data): %0.3f" % rmse) res_file = open(model_dir + "/results.txt", "w") res_file.write("### Results of the regression ###\n") res_file.write("R^2 score (on training data): %0.3f\n" % r2_score) res_file.write("Final RMSE (on training data): %0.3f\n" % rmse) res_file.close() joblib.dump(regressor, model_dir + "/svm_reg.joblib") return regressor, r2_score
def test_multioutput(self): # http://scikit-learn.org/stable/auto_examples/ensemble/plot_random_forest_regression_multioutput.html#sphx-glr-auto-examples-ensemble-plot-random-forest-regression-multioutput-py from sklearn.multioutput import MultiOutputRegressor from sklearn.ensemble import RandomForestRegressor # Create a random dataset rng = np.random.RandomState(1) X = np.sort(200 * rng.rand(600, 1) - 100, axis=0) y = np.array([np.pi * np.sin(X).ravel(), np.pi * np.cos(X).ravel()]).T y += (0.5 - rng.rand(*y.shape)) df = pdml.ModelFrame(X, target=y) max_depth = 30 rf1 = df.ensemble.RandomForestRegressor(max_depth=max_depth, random_state=self.random_state) reg1 = df.multioutput.MultiOutputRegressor(rf1) rf2 = RandomForestRegressor(max_depth=max_depth, random_state=self.random_state) reg2 = MultiOutputRegressor(rf2) df.fit(reg1) reg2.fit(X, y) result = df.predict(reg2) expected = pd.DataFrame(reg2.predict(X)) tm.assert_frame_equal(result, expected)
def test_multi_target_sample_weights(): # weighted regressor Xw = [[1, 2, 3], [4, 5, 6]] yw = [[3.141, 2.718], [2.718, 3.141]] w = [2., 1.] rgr_w = MultiOutputRegressor(GradientBoostingRegressor(random_state=0)) rgr_w.fit(Xw, yw, w) # unweighted, but with repeated samples X = [[1, 2, 3], [1, 2, 3], [4, 5, 6]] y = [[3.141, 2.718], [3.141, 2.718], [2.718, 3.141]] rgr = MultiOutputRegressor(GradientBoostingRegressor(random_state=0)) rgr.fit(X, y) X_test = [[1.5, 2.5, 3.5], [3.5, 4.5, 5.5]] assert_almost_equal(rgr.predict(X_test), rgr_w.predict(X_test))
class Solver: def __init__(self, func, scopes): self.func = func self.scopes = np.array(scopes) self.model = None def train(self, epochs=1e3, verbose=False): self.model = MultiOutputRegressor( MLPRegressor(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(100, 30), random_state=1)) n_variables = len(self.scopes) xmin = self.scopes[:, 0] xmax = self.scopes[:, 1] Xs = list() Ys = list() if verbose: print("Generating training data...", end="") for i in range(int(epochs)): x = xmin + (xmax - xmin) * np.random.random(n_variables) Xs.append(self.func(x)) Ys.append(x) if (i + 1) % int(epochs / 10) == 0 and verbose: print(" {value:0.0f}% ".format(value=(i + 1) / int(epochs) * 100), end="") if verbose: print("Complete!") #Xs = np.array(Xs) #Ys = np.array(Ys) if verbose: print("Training model...", end='') self.model.fit(Xs, Ys) if verbose: print("End with R^2: {value:0.4f}".format( value=self.model.score(Xs, Ys))) def evaluate(self, bs): return self.model.predict(bs) def evaluate_single(self, b): return self.model.predict([b])[0]
def multi_output_regression(train, test, grid, outputs): # Multi-Layer Perceptron Regressor input_train, input_test, output_train, actual = pd.training_testing_data( train, test, grid, outputs) print('You are training on %d samples' % (len(input_train))) print('You are testing on %d samples' % (len(input_test))) multi_output_mlp = MultiOutputRegressor( MLPRegressor(solver='adam', learning_rate='adaptive', max_iter=500, early_stopping=True)) multi_output_mlp.fit(input_train, output_train) prediction_mlp = multi_output_mlp.predict(input_test) print('Multi-Layer Perceptron') print(r'$R^{2}$: %.5f' % (r2_score(actual, prediction_mlp))) print('MSE: %.5f' % (mean_squared_error(actual, prediction_mlp))) print('RMSE: %.5f' % (np.sqrt(mean_squared_error(actual, prediction_mlp)))) # Gradient Boosting Regressor input_train, input_test, output_train, actual = pd.training_testing_data( train, test, grid, outputs) print('You are training on %d samples' % (len(input_train))) print('You are testing on %d samples' % (len(input_test))) multi_output_gbr = MultiOutputRegressor( GradientBoostingRegressor(loss='huber')) multi_output_gbr.fit(input_train, output_train) prediction_gbr = multi_output_gbr.predict(input_test) print('Gradient Boosting Regressor') print(r'$R^{2}$: %.5f' % (r2_score(actual, prediction_gbr))) print('MSE: %.5f' % (mean_squared_error(actual, prediction_gbr))) print('RMSE: %.5f' % (np.sqrt(mean_squared_error(actual, prediction_gbr)))) # Random Forest Regressor input_train, input_test, output_train, actual = pd.training_testing_data( train, test, grid, outputs) print('You are training on %d samples' % (len(input_train))) print('You are testing on %d samples' % (len(input_test))) multi_output_rfr = MultiOutputRegressor(RandomForestRegressor()) multi_output_rfr.fit(input_train, output_train) prediction_rfr = multi_output_rfr.predict(input_test) print('Random Forest Regressor') print(r'$R^{2}$: %.5f' % (r2_score(actual, prediction_rfr))) print('MSE: %.5f' % (mean_squared_error(actual, prediction_rfr))) print('RMSE: %.5f' % (np.sqrt(mean_squared_error(actual, prediction_rfr)))) return actual, prediction_gbr, prediction_mlp, prediction_rfr
def test_multi_target_sparse_regression(): X, y = datasets.make_regression(n_targets=3) X_train, y_train = X[:50], y[:50] X_test = X[50:] for sparse in [ sp.csr_matrix, sp.csc_matrix, sp.coo_matrix, sp.dok_matrix, sp.lil_matrix ]: rgr = MultiOutputRegressor(Lasso(random_state=0)) rgr_sparse = MultiOutputRegressor(Lasso(random_state=0)) rgr.fit(X_train, y_train) rgr_sparse.fit(sparse(X_train), y_train) assert_almost_equal(rgr.predict(X_test), rgr_sparse.predict(sparse(X_test)))
def multir(request, model): bolsa = pd.read_csv("app/data/bolsa.csv", index_col='Date').groupby('Codigo') lista = [ 'B3SA3', 'BBDC4', 'BRAP4', 'BRFS3', 'BRKM5', 'BRML3', 'BTOW3', 'CCRO3', 'CIEL3', 'CMIG4', 'CSAN3', 'CSNA3', 'CYRE3', 'ECOR3', 'EGIE3', 'ELET3', 'ELET6', 'EMBR3', 'ENBR3', 'EQTL3', 'ESTC3', 'FLRY3', 'GGBR4', 'GOAU4', 'GOLL4', 'HYPE3', 'IGTA3', 'KROT3', 'ITSA4', 'ITUB4', 'LAME4', 'LREN3', 'MGLU3', 'MRFG3', 'MRVE3', 'MULT3', 'NATU3', 'PCAR4', 'PETR3', 'PETR4', 'QUAL3', 'RADL3', 'RENT3', 'SANB11', 'SBSP3', 'TAEE11', 'TIMP3', 'UGPA3', 'USIM5', 'VALE3', 'VIVT4', 'WEGE3' ] resultado = [] for item in lista: bolsa = pd.read_csv("app/data/bolsa.csv", index_col='Date').groupby('Codigo') dados = bolsa.get_group(item) X = dados[['Open', 'High', 'Low', 'Close', 'Volume']] y = pd.DataFrame({ 'Alta_real': dados['High'].shift(-1).fillna(method='pad'), 'Baixa_real': dados['Low'].shift(-1).fillna(method='pad') }) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, shuffle=False, random_state=0) if (model == 'adr'): modelo = "Automatic Relevance Determination Regression" #regr_multi = MultiOutputRegressor(svm.SVR()) regr_multi = MultiOutputRegressor( linear_model.ARDRegression(compute_score=True)) elif (model == 'ada'): modelo = "Ada Regressor" regr_multi = MultiOutputRegressor( AdaBoostRegressor(random_state=0, n_estimators=100)) elif (model == 'GB'): modelo = "GradientBoostingRegressor" regr_multi = MultiOutputRegressor( GradientBoostingRegressor(random_state=1, n_estimators=10)) else: modelo = "LinerRegression com Bayesian Ridge" regr_multi = MultiOutputRegressor(linear_model.BayesianRidge()) regr_multi = regr_multi.fit(X_train, y_train) y_pred = regr_multi.predict(X_test) #print(item) #print(": ") #print(r2_score(y_test, y_pred)) #print(item,": ", r2_score(y_test, y_pred)) r = r2_score(y_test, y_pred) resultado.append([item, r]) resultado_geral = pd.DataFrame(resultado).to_html() context = {'modelo': modelo, 'resultado': resultado_geral} return render(request, 'app/multi.html', context)
def SVM(xtr, ytr, xts, yts): start = time() SVR_RBF = MultiOutputRegressor( SVR(verbose=0, kernel='rbf', C=23.5, epsilon=0.01, gamma=0.1)) SVR_RBF.fit(xtr, ytr) tmp = time() - start prd = SVR_RBF.predict(xts) return mean_euclidean_error(prd, yts), tmp
def runBaseLineRegression(model_params,data,estimator): #regr = MultiOutputRegressor(sklearn.linear_model.LinearRegression()) regr = MultiOutputRegressor(estimator) #regr = MultiOutputRegressor(sklearn.linear_model.BayesianRidge()) #regr = MultiOutputRegressor(sklearn.linear_model.Lasso()) #data AP_train,TRP_train = data[0] AP_dev,TRP_dev = data[1] if model_params["DirectionForward"]: X_train,Y_train,X_dev,Y_dev = TRP_train,AP_train,TRP_dev,AP_dev else: X_train,Y_train,X_dev,Y_dev = AP_train,TRP_train,AP_dev,TRP_dev model_params["OutputNames"],model_params["InputNames"] = model_params["InputNames"],model_params["OutputNames"] regr.fit(X_train,Y_train) Y_dev_pred = regr.predict(X_dev) Y_train_pred = regr.predict(X_train) if model_params["DirectionForward"]: #train mse_totoal_train = customUtils.mse_p(ix = (3,6),Y_pred = Y_train_pred,Y_true = Y_train) #dev mse_totoal_dev = customUtils.mse_p(ix = (3,6),Y_pred = Y_dev_pred,Y_true = Y_dev) else: mse_totoal_train = mse(Y_train,Y_train_pred,multioutput = 'raw_values') mse_totoal_dev = mse(Y_dev,Y_dev_pred,multioutput = 'raw_values') model_location = os.path.join('models',model_params["model_name"] + '.json') with open(os.path.join('model_params',model_params["model_name"] + '.json'), 'w') as fp: json.dump(model_params, fp, sort_keys=True) _ = run_eval_base(model_location,dataset = "train",email = model_params["email"]) _ = run_eval_base(model_location,dataset = "test",email = model_params["email"]) mse_total = run_eval_base(model_location,dataset = "dev",email = model_params["email"]) return (mse_totoal_train.tolist(),mse_totoal_dev.tolist(),mse_totoal_train.sum(),mse_totoal_dev.sum())
def make_bayesian_pred(df, next_week, debug=0): """ This method creates predictions using bayesian regression. """ space = { 'estimator__alpha_1': [1e-10, 1e-5, 1], 'estimator__alpha_2': [1e-10, 1e-5, 1], 'estimator__lambda_1': [1e-10, 1e-5, 1], 'estimator__lambda_2': [1e-10, 1e-5, 1], 'estimator__n_iter': [10, 300, 1000], 'estimator__normalize': [True, False], 'estimator__fit_intercept': [True, False] } params = { 'estimator__alpha_1': [1e-10, 1e-5, 1, 5], 'estimator__alpha_2': [1e-10, 1e-5, 1, 5], 'estimator__lambda_1': [1e-10, 1e-5, 1, 5], 'estimator__lambda_2': [1e-10, 1e-5, 1, 5], 'estimator__n_iter': [10, 300, 1000], 'estimator__normalize': [True, False], 'estimator__n_jobs': -1, 'n_jobs': -1, 'estimator__fit_intercept': [True, False] } X_train, X_test, Y_train, Y_test = process_data(df, next_week) multi_bay = MultiOutputRegressor(BayesianRidge()) #multi_bay.set_params(**params) #best_random = grid_search(multi_bay, space, next_week, 3, X_train, Y_train) multi_bay.fit(X_train, Y_train) next_week[Y_train.columns] = multi_bay.predict(next_week[X_train.columns]) if debug: y_pred_untrain = multi_bay.predict(X_train) print(next_week) print("Score: ", multi_bay.score(X_train, Y_train) * 100) print("MSE: ", metrics.mean_squared_error(Y_train, y_pred_untrain)) print( "CV: ", ms.cross_val_score(multi_bay, Y_train, y_pred_untrain, cv=10, scoring='neg_mean_squared_error')) return next_week
class RandomForestSurrogate(Surrogate[ModelConfig], DatasetFeaturesMixin): """ The Random Forest surrogate uses a random forest model to predict model performances. """ def __init__( self, tracker: ModelTracker, use_simple_dataset_features: bool = False, use_seasonal_naive_performance: bool = False, use_catch22_features: bool = False, predict: Optional[List[str]] = None, output_normalization: OutputNormalization = None, impute_simulatable: bool = False, ): """ Args: tracker: A tracker that can be used to impute latency and number of model parameters into model performances. Also, it is required for some input features. use_simple_dataset_features: Whether to use dataset features to predict using a weighted average. use_seasonal_naive_performance: Whether to use the Seasonal Naïve nCRPS as dataset featuers. Requires the cacher to be set. use_catch22_features: Whether to use catch22 features for datasets statistics. Ignored if `use_dataset_features` is not set. predict: The metrics to predict. All if not provided. output_normalization: The type of normalization to apply to the features of each dataset independently. `None` applies no normalization, "quantile" applies quantile normalization, and "standard" transforms data to have zero mean and unit variance. impute_simulatable: Whether the tracker should impute latency and number of model parameters into the returned performance object. """ super().__init__(tracker, predict, output_normalization, impute_simulatable) self.config_transformer = ConfigTransformer( add_model_features=True, add_dataset_statistics=use_simple_dataset_features, add_seasonal_naive_performance=use_seasonal_naive_performance, add_catch22_features=use_catch22_features, tracker=tracker, ) base_estimator = RandomForestRegressor(n_jobs=1) self.estimator = MultiOutputRegressor(base_estimator) def _fit(self, X: List[Config[ModelConfig]], y: npt.NDArray[np.float32]) -> None: X_numpy = self.config_transformer.fit_transform(X) self.estimator.fit(X_numpy, y) def _predict(self, X: List[Config[ModelConfig]]) -> npt.NDArray[np.float32]: X_numpy = self.config_transformer.transform(X) return self.estimator.predict(X_numpy)
class DTRmodel: def __init__(self, fl, max_depth=8, num_est=300): """ Initialises new DNN model based on input features_dim, labels_dim, hparams :param features_dim: Number of input feature nodes. Integer :param labels_dim: Number of output label nodes. Integer :param hparams: Dict containing hyperparameter information. Dict can be created using create_hparams() function. hparams includes: hidden_layers: List containing number of nodes in each hidden layer. [10, 20] means 10 then 20 nodes. """ self.labels_dim = fl.labels_dim # Assuming that each task has only 1 dimensional output self.labels_scaler = fl.labels_scaler self.model = MultiOutputRegressor( AdaBoostRegressor(DecisionTreeRegressor(max_depth=max_depth), n_estimators=num_est)) self.normalise_labels = fl.normalise_labels def train_model(self, fl, save_mode=False, plot_name=None): training_features = fl.features_c_norm if self.normalise_labels: training_labels = fl.labels_norm else: training_labels = fl.labels self.model.fit(training_features, training_labels) return self.model def eval(self, eval_fl): features = eval_fl.features_c_norm if self.labels_dim == 1: y_pred = self.model.predict(features)[:, None] else: y_pred = self.model.predict(features) if self.normalise_labels: mse_norm = mean_squared_error(eval_fl.labels_norm, y_pred) mse = mean_squared_error( eval_fl.labels, self.labels_scaler.inverse_transform(y_pred)) else: mse_norm = -1 mse = mean_squared_error(eval_fl.labels, y_pred) return y_pred, mse, mse_norm
def test_multi_regressor(): # get some noised linear data X = np.random.random((1000, 10)) a = np.random.random((10, 3)) y = np.dot(X, a) + np.random.normal(0, 1e-3, (1000, 3)) # fitting multioutputregressor = MultiOutputRegressor(xgboost.XGBRegressor(learning_rate=0.01), n_jobs=4).fit(X, y) # lgb.LGBMRegressor # predicting print(np.mean((multioutputregressor.predict(X) - y)**2, axis=0)) # 0.004, 0.003, 0.005
def svm_twostage(X_train, X_test, Y_train, Y_test, num_bus, kernel='poly', degree=2, epsilon=0.01): y_train, y_test = Y_train[:, :(2 * num_bus)], Y_test[:, :(2 * num_bus)] svr = MultiOutputRegressor( SVR(kernel=kernel, degree=degree, C=1, epsilon=epsilon)).fit(X_train, y_train) train_pred = svr.predict(X_train) train_rms = np.sqrt(mean_squared_error(y_train, train_pred)) train_pe_rms = Penalty(Y_train[:, (2 * num_bus):], train_pred, num_bus) test_pred = svr.predict(X_test) test_rms = np.sqrt(mean_squared_error(y_test, test_pred)) test_pe_rms = Penalty(Y_test[:, (2 * num_bus):], test_pred, num_bus) result = np.array([train_rms, train_pe_rms, test_rms, test_pe_rms]) return result, train_pred, test_pred
def gbr(): dataHandler = DataHandler('./inputs/training_data.txt', 5, 60) # Train the model - note that we need to wrap the single output gradientBoostingRegressor with the # MultiOutputRegressor class to fit multiple output data x, y = dataHandler.get_training_data() boost_regressor = MultiOutputRegressor( GradientBoostingRegressor(learning_rate=0.1, n_estimators=100, verbose=0)) boost_regressor.fit(x, y) # Evaluate the model x_val, y_val = dataHandler.get_validation_data() median_MSE, mean_MSE, max_MSE, min_MSE = score_prediction( boost_regressor.predict(x_val), y_val) x_pred = boost_regressor.predict(x_val) L2 = np.sqrt(np.sum((x_pred - y_val)**2, 1)) mean_L2 = np.mean(L2) median_L2 = np.median(L2) return (mean_L2, median_L2, mean_MSE, median_MSE)
def firstGBDT(wfTrain, posTrain, wfTest, posTest): clf = MultiOutputRegressor( ensemble.GradientBoostingRegressor(n_estimators=250, max_depth=20)) startTime = time.time() clf.fit(wfTrain, posTrain) endTime = time.time() print( "The Gradient Boosting Regression spend %.3f seconds to fit the model" % (endTime - startTime)) posPred = clf.predict(wfTest) acc = accuracy(posPred, posTest) print("Accuracy is: %.3f" % (acc))
def ForestRegressor(self, name): sciForest = MultiOutputRegressor( RandomForestRegressor(n_estimators=33) ) sciForest.fit(self.X_train, self.Y_train[:,:2]) predict_test = sciForest.predict(self.X_test) MSE = mean_squared_error(predict_test, self.Y_test[:,:2]) print MSE
def compare_process(X,y,res): X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=50) # min_max_scaler = preprocessing.StandardScaler()#StandardScaler # X_train = min_max_scaler.fit_transform(X_train) # X_test = min_max_scaler.fit_transform(X_test) for c in clfs: mt = MultiOutputRegressor(clfs[c]) mt.fit(X_train,y_train) y_pred = mt.predict(X_test) # res.append(mean_absolute_error(y_test, y_pred)) res[c].append(np.average(np.apply_along_axis(np.linalg.norm,1,y_test-y_pred))) print res
def stratCV(model, nfolds, train_X, train_Y, **params): mskf = MultilabelStratifiedKFold(n_splits=nfolds, shuffle=True) for train_index, valid_index in mskf.split(train_X, train_Y): print("TRAIN:", train_index, "VALID:", valid_index) X_train, X_valid = train_X[train_index], train_X[valid_index] Y_train, Y_valid = train_Y[train_index], train_Y[valid_index] m = MultiOutputRegressor(model(**params)) m.fit(X_train, Y_train) y_preds = m.predict(X_valid) y_score = log_loss(Y_valid, y_preds) print(y_score)
def MLPRegressor(self, name): sciMLP = MultiOutputRegressor( MLPRegressor(hidden_layer_sizes=(66,), activation='logistic', solver='adam', max_iter=200, batch_size=50) ) sciMLP.fit(self.X_train, self.Y_train[:,:2]) predict_test = sciMLP.predict(self.X_test) MSE = mean_squared_error(predict_test, self.Y_test[:,:2]) print MSE
class DTRmodel: def __init__(self, fl, max_depth=8, num_est=300, chain=False): """ Initialises new DTR model :param fl: fl class :param max_depth: max depth of each tree :param num_est: Number of estimators in the ensemble of trees :param chain: regressor chain (True) or independent multi-output (False) """ self.labels_dim = fl.labels_dim self.labels_scaler = fl.labels_scaler if chain: self.model = RegressorChain( AdaBoostRegressor(DecisionTreeRegressor(max_depth=max_depth), n_estimators=num_est)) else: self.model = MultiOutputRegressor( AdaBoostRegressor(DecisionTreeRegressor(max_depth=max_depth), n_estimators=num_est)) self.normalise_labels = fl.normalise_labels def train_model(self, fl, *args, **kwargs): # *args, **kwargs is there for compatibility with the KModel class training_features = fl.features_c_norm if self.normalise_labels: training_labels = fl.labels_norm else: training_labels = fl.labels self.model.fit(training_features, training_labels) return self def predict(self, eval_fl): features = eval_fl.features_c_norm if self.labels_dim == 1: # If labels is 1D output, the prediction will be a 1D array not 2D y_pred = self.model.predict(features)[:, None] else: y_pred = self.model.predict(features) return y_pred # If labels is normalized, the prediction here is also normalized!
def _scikit_model(model, X_train, y_train, X_test, multiregressor=False): print("Fitting: " + model) clf = models[model] if multiregressor: regr = MultiOutputRegressor(clf) else: regr = clf regr.fit(X_train, y_train) predictions = regr.predict(X_test) return predictions
def test_fit_as_multi_output_regressor_if_target_to_feature_none( self, estimator, X_y): X, y = X_y multi_feature_multi_output_regressor = MultiFeatureMultiOutputRegressor( estimator) multi_feature_multi_output_regressor.fit(X, y) multi_output_regressor = MultiOutputRegressor(estimator) multi_output_regressor.fit(X, y) assert_almost_equal( multi_feature_multi_output_regressor.predict(X), multi_output_regressor.predict(X), )
class SvmModel (object): def __init__(self) : # Build Model self.model = MultiOutputRegressor(GradientBoostingRegressor(random_state=0)) #Train model def trainModel(self,trainX,trainY): self.model = self.model.fit(trainX, trainY) #Predict def predict(self,testX): result = self.model.predict(testX) return result
def MOR_model(x_train,y_train,x_test): ''' @The function apply the Logistic Regressor into the training, testing data @Input x_train, x_test: the first 18 hours measurements, y_test: the following 8 hours measurements in training data @Output is the predicted measurements of the test set ''' MOR_time_start = time.clock() MOR = MultiOutputRegressor(GradientBoostingRegressor(random_state=0)) MOR = MOR.fit(x_train, y_train) MOR_time_elapsed = (time.clock() - MOR_time_start) exe_time.append(MOR_time_elapsed) return(MOR.predict(x_test))
def plot_learning_curve(C, gamma, epsilon): kfold = KFold(n_splits=splits_kfold, random_state=None, shuffle=True) for traing_index, test_index in kfold.split(X): x_tr = X[traing_index] y_tr = Y[traing_index] x_ts = X[test_index] y_ts = Y[test_index] all_loss = [] all_loss_tr = [] n_examples = [] for step in range(2, 102, 2): ind_x = int(step * (len(x_tr) / 100)) ind_y = int(step * (len(y_tr) / 100)) this_x_tr = x_tr[0:ind_x, :] this_y_tr = y_tr[0:ind_y, :] svr = SVR(C=C, gamma=gamma, epsilon=epsilon, verbose=False) mor = MultiOutputRegressor(svr) mor.fit(this_x_tr, this_y_tr) y_pred_tr = mor.predict(this_x_tr) y_pred = mor.predict(x_ts) this_loss = loss_fn(y_pred, y_ts) this_loss_tr = loss_fn(y_pred_tr, this_y_tr) n_examples.append(int(step * (len(x_tr) / 100))) all_loss.append(this_loss) all_loss_tr.append(this_loss_tr) plt.plot(n_examples, all_loss_tr) plt.plot(n_examples, all_loss, '--') plt.title("Learning Curve SVM C=" + str(C) + " gamma=" + str(gamma) + " epsilon=" + str(epsilon)) plt.xlabel("Number of training examples") plt.ylabel("Loss (Mean Euclidian Distance)") plt.legend(["Loss on training set", "Loss on validation set"]) plt.savefig('./svm_learning_curve_' + str(C) + '_' + str(gamma) + '_' + str(epsilon) + '.png', dpi=500) plt.close() return
class GPR: def __init__(self): pass def set_data(self, features, targets, D, denom_sq): self.features = features self.targets = targets self.D = D self.inv_denom_sq = denom_sq**-1 def train(self, config): input_size = self.features['train'].shape[1] alpha = 1e-9 # 1e-5 # IMPORTANT: if no kernel is specified, a constant one will be used per default. # The constant kernels hyperparameters will NOT be optimized! #kernel = 1.0 * RBF(length_scale=1.0, length_scale_bounds=(1e-2, 1e2)) kernel = 0.01 * RBF(length_scale=[0.1]*input_size, length_scale_bounds=(1e-2, 1e+2)) \ + WhiteKernel(noise_level=alpha, noise_level_bounds=(1e-10, 1e0)) regressor = GaussianProcessRegressor(kernel=kernel, normalize_y=False, n_restarts_optimizer=10) self.model = MultiOutputRegressor(regressor) self.model.fit(self.features['train'], self.targets['train']) # Print learnt hyperparameters #for e in self.model.estimators_: # print(e.kernel_.get_params()) def evaluate(self, features): return self.model.predict(features) def test(self): f = self.features['test'] t = self.targets['test'] q_rb = self.evaluate(f) eps_reg_sq = np.sum( (self.D * (q_rb - t))**2) * self.inv_denom_sq / f.shape[0] return eps_reg_sq**0.5 def save(self, model_dir, component): path = os.path.join(model_dir, 'GPR', component, 'model') with open(path, 'wb+') as f: pickle.dump(self.model, f) def load(self, model_dir, component): path = os.path.join(model_dir, 'GPR', component, 'model') with open(path, 'rb') as f: self.model = pickle.load(f)
def cross_validate(allData, material, regressior, time_window, metric_fun): ts_market_data_by_molecule, ts_sales_data, ts_market_data, stock, market_percentage = allData.get_dataframes_for_material( str(material)) df_market = fg.transform_time_series( ts_market_data_by_molecule[ts_market_data_by_molecule.columns[0]]) df_internal = fg.transform_time_series( ts_sales_data[ts_sales_data.columns[0]]) df_external = fg.transform_time_series( ts_market_data[ts_market_data.columns[0]]) df_external = df_external.append(fg.add_latest_month(df_external)) df_market = df_market.append(fg.add_latest_month(df_market)) df_joined = df_internal.add_suffix('_int').join( df_external.drop(columns=['quarter', 'month', 'year', 't']). add_suffix('_ext')).join( df_market.drop( columns=['quarter', 'month', 'year', 't']).add_suffix('_comp')) X_test = df_joined[-time_window:] y_test = X_test[['t_int', 't-1_ext', 't-1_comp']] X_test = X_test.drop(columns=['t_int', 't-1_ext', 't-1_comp']) X_test = X_test.head(n=1) X_train = df_joined[:-time_window] y_train = X_train[['t_int', 't-1_ext', 't-1_comp']] X_train = X_train.drop(columns=['t_int', 't-1_ext', 't-1_comp']) last_X_int = df_internal[:-time_window].tail(n=1) last_X_ext = df_external[:-time_window].tail(n=1) last_X_comp = df_market[:-time_window].tail(n=1) reg = MultiOutputRegressor(regressior) reg.fit(X_train, y_train) date_rng = pd.date_range(start=y_test.index[-1].date(), end=y_test.index[-1].date() + timedelta(days=365), freq='MS') date_rng = y_test.index Y_pred = [] for i in range(0, time_window): X_to_predict = fg.get_x_to_predict_all_data(last_X_int, last_X_ext, last_X_comp) X_to_predict = X_to_predict[X_test.columns] Y_pred_to_add = reg.predict(X_to_predict) last_X_int = fg.add_latest_month(last_X_int) last_X_int['t'] = float(Y_pred_to_add[0][0]) last_X_ext = fg.add_latest_month(last_X_ext) last_X_ext['t-1'] = float(Y_pred_to_add[0][1]) last_X_comp = fg.add_latest_month(last_X_comp) last_X_comp['t-1'] = float(Y_pred_to_add[0][2]) Y_pred.append(Y_pred_to_add[0][0]) Y_pred = pd.Series(Y_pred).astype(float) Y_pred.index = date_rng return metric_fun(X_test, Y_pred['t_int'])
def Regression(X,Y,groups,grade): lpgo = GroupKFold(n_splits=14) MAE = [] ECM = [] MAPE = [] R2_SCORE = [] N = np.size(Y[0]) #Se añaden los grados del polinomio a las caracteristicas poly = PolynomialFeatures(degree=grade) X = poly.fit_transform(X) for train_index,test_index in lpgo.split(X, Y, groups): X_train, X_test = X[train_index], X[test_index] y_train, y_test = Y[train_index], Y[test_index] #Normalización de los datos sc_X = StandardScaler() X_train = sc_X.fit_transform(X_train) X_test = sc_X.transform(X_test) # Ajustar el modelo a la regresión simple regressor = LinearRegression() multiple_output_regressor = MultiOutputRegressor(regressor) multiple_output_regressor.fit(X_train, y_train) # Predecir los resultados de prueba y_pred = multiple_output_regressor.predict(X_test) #print("R2-1",multiple_output_regressor.score(X_test,y_test[:,0])) #print("R2-2",multiple_output_regressor.score(X_test,y_test[:,1])) ECM.append(mean_squared_error(y_test,y_pred,multioutput='raw_values')) MAE.append(mean_absolute_error(y_test,y_pred,multioutput='raw_values')) R2_SCORE.append(r2_score(y_test, y_pred, multioutput='raw_values')) m = [] m.append(np.mean(np.abs((y_test[:,0] - y_pred[:,0]) / y_test[:,0])) * 100) m.append(np.mean(np.abs((y_test[:,1] - y_pred[:,1]) / y_test[:,1])) * 100) MAPE.append(m) ECM_matrix = np.asmatrix(ECM) MAE_matrix = np.asmatrix(MAE) MAPE_matrix = np.asmatrix(MAPE) R2_matrix = np.asmatrix(R2_SCORE) for i in range(0,N): print("El error cuadratrico medio de validación para la salida", (i+1),"es (ECM):", np.mean(ECM_matrix[:,i]),"+-",np.std(ECM_matrix[:,i])) print("El error medio absoluto de validación para la salida", (i+1),"es (MAE):", np.mean(MAE_matrix[:,i]),"+-",np.std(MAE_matrix[:,i])) print("El porcentaje de error medio absoluto de validación para la salida", (i+1),"es (MAPE):", np.mean(MAPE_matrix[:,i]),"%" ,"+-",np.std(MAPE_matrix[:,i])) print("Coeficiente de determinación para la salida", (i+1),"es (R2):", np.around(np.mean(R2_matrix[:,i])),"%","+-",np.around(np.std(R2_matrix[:,i]),decimals=5))
class QLearningGBM(Model): def __init__(self, newEstimatorsPerLearn): super(QLearningGBM, self).__init__() self.newEstimatorsPerLearn = newEstimatorsPerLearn self.GBM = MultiOutputRegressor(GradientBoostingRegressor( warm_start=True, verbose=True, n_estimators=newEstimatorsPerLearn, learning_rate=0.01), n_jobs=-1) def predict(self, X): try: return self.GBM.predict( X) # Vector with estimated points for all actions except NotFittedError as e: return np.random.rand(15) def learn(self, X, ACTION, Y, learnScale=False): Y_LEARN = self.getYOnlyForActionTaken(X, ACTION, Y) self.GBM.estimator.n_estimators += self.newEstimatorsPerLearn print "TOTAL TREES", self.GBM.estimator.n_estimators self.GBM.fit(X, Y_LEARN) def getYOnlyForActionTaken(self, X, ACTION, Y): predictionRows = list() for i in range(X.shape[0]): try: allActionPredictions = self.GBM.predict(X[i, :].reshape( 1, -1))[0] # Current predictions except NotFittedError as e: allActionPredictions = np.random.rand(15) allActionPredictions[ACTION[i]] = Y[ i] # Only change the prediction for the action that was taken to the expected Y value predictionRows += [allActionPredictions] return np.array(predictionRows)
y += (0.5 - rng.rand(*y.shape)) X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=400, random_state=4) max_depth = 30 regr_multirf = MultiOutputRegressor(RandomForestRegressor(max_depth=max_depth, random_state=0)) regr_multirf.fit(X_train, y_train) regr_rf = RandomForestRegressor(max_depth=max_depth, random_state=2) regr_rf.fit(X_train, y_train) # Predict on new data y_multirf = regr_multirf.predict(X_test) y_rf = regr_rf.predict(X_test) # Plot the results plt.figure() s = 50 a = 0.4 plt.scatter(y_test[:, 0], y_test[:, 1], edgecolor='k', c="navy", s=s, marker="s", alpha=a, label="Data") plt.scatter(y_multirf[:, 0], y_multirf[:, 1], edgecolor='k', c="cornflowerblue", s=s, alpha=a, label="Multi RF score=%.2f" % regr_multirf.score(X_test, y_test)) plt.scatter(y_rf[:, 0], y_rf[:, 1], edgecolor='k', c="c", s=s, marker="^", alpha=a, label="RF score=%.2f" % regr_rf.score(X_test, y_test)) plt.xlim([-6, 6])