def test_multioutput(self): # http://scikit-learn.org/stable/auto_examples/ensemble/plot_random_forest_regression_multioutput.html#sphx-glr-auto-examples-ensemble-plot-random-forest-regression-multioutput-py from sklearn.multioutput import MultiOutputRegressor from sklearn.ensemble import RandomForestRegressor # Create a random dataset rng = np.random.RandomState(1) X = np.sort(200 * rng.rand(600, 1) - 100, axis=0) y = np.array([np.pi * np.sin(X).ravel(), np.pi * np.cos(X).ravel()]).T y += (0.5 - rng.rand(*y.shape)) df = pdml.ModelFrame(X, target=y) max_depth = 30 rf1 = df.ensemble.RandomForestRegressor(max_depth=max_depth, random_state=self.random_state) reg1 = df.multioutput.MultiOutputRegressor(rf1) rf2 = RandomForestRegressor(max_depth=max_depth, random_state=self.random_state) reg2 = MultiOutputRegressor(rf2) df.fit(reg1) reg2.fit(X, y) result = df.predict(reg2) expected = pd.DataFrame(reg2.predict(X)) tm.assert_frame_equal(result, expected)
def test_multi_target_sample_weights_api(): X = [[1, 2, 3], [4, 5, 6]] y = [[3.141, 2.718], [2.718, 3.141]] w = [0.8, 0.6] rgr = MultiOutputRegressor(Lasso()) assert_raises_regex(ValueError, "does not support sample weights", rgr.fit, X, y, w) # no exception should be raised if the base estimator supports weights rgr = MultiOutputRegressor(GradientBoostingRegressor(random_state=0)) rgr.fit(X, y, w)
def test_acquisition_per_second_gradient(acq_func): rng = np.random.RandomState(0) X = rng.randn(20, 10) # Make the second component large, so that mean_grad and std_grad # do not become zero. y = np.vstack((X[:, 0], np.abs(X[:, 0])**3)).T for X_new in [rng.randn(10), rng.randn(10)]: gpr = cook_estimator("GP", Space(((-5.0, 5.0),)), random_state=0) mor = MultiOutputRegressor(gpr) mor.fit(X, y) check_gradient_correctness(X_new, mor, acq_func, 1.5)
def test_multi_target_sparse_regression(): X, y = datasets.make_regression(n_targets=3) X_train, y_train = X[:50], y[:50] X_test = X[50:] for sparse in [sp.csr_matrix, sp.csc_matrix, sp.coo_matrix, sp.dok_matrix, sp.lil_matrix]: rgr = MultiOutputRegressor(Lasso(random_state=0)) rgr_sparse = MultiOutputRegressor(Lasso(random_state=0)) rgr.fit(X_train, y_train) rgr_sparse.fit(sparse(X_train), y_train) assert_almost_equal(rgr.predict(X_test), rgr_sparse.predict(sparse(X_test)))
def test_multi_target_sample_weights(): # weighted regressor Xw = [[1, 2, 3], [4, 5, 6]] yw = [[3.141, 2.718], [2.718, 3.141]] w = [2., 1.] rgr_w = MultiOutputRegressor(GradientBoostingRegressor(random_state=0)) rgr_w.fit(Xw, yw, w) # unweighted, but with repeated samples X = [[1, 2, 3], [1, 2, 3], [4, 5, 6]] y = [[3.141, 2.718], [3.141, 2.718], [2.718, 3.141]] rgr = MultiOutputRegressor(GradientBoostingRegressor(random_state=0)) rgr.fit(X, y) X_test = [[1.5, 2.5, 3.5], [3.5, 4.5, 5.5]] assert_almost_equal(rgr.predict(X_test), rgr_w.predict(X_test))
def test_multi_target_sample_weights(): # weighted regressor Xw = [[1, 2, 3], [4, 5, 6]] yw = [[3.141, 2.718], [2.718, 3.141]] w = [2.0, 1.0] rgr_w = MultiOutputRegressor(GradientBoostingRegressor(random_state=0)) rgr_w.fit(Xw, yw, w) # unweighted, but with repeated samples X = [[1, 2, 3], [1, 2, 3], [4, 5, 6]] y = [[3.141, 2.718], [3.141, 2.718], [2.718, 3.141]] rgr = MultiOutputRegressor(GradientBoostingRegressor(random_state=0)) rgr.fit(X, y) X_test = [[1.5, 2.5, 3.5], [3.5, 4.5, 5.5]] assert_almost_equal(rgr.predict(X_test), rgr_w.predict(X_test))
class Solver: def __init__(self, func, scopes): self.func = func self.scopes = np.array(scopes) self.model = None def train(self, epochs=1e3, verbose=False): self.model = MultiOutputRegressor( MLPRegressor(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(100, 30), random_state=1)) n_variables = len(self.scopes) xmin = self.scopes[:, 0] xmax = self.scopes[:, 1] Xs = list() Ys = list() if verbose: print("Generating training data...", end="") for i in range(int(epochs)): x = xmin + (xmax - xmin) * np.random.random(n_variables) Xs.append(self.func(x)) Ys.append(x) if (i + 1) % int(epochs / 10) == 0 and verbose: print(" {value:0.0f}% ".format(value=(i + 1) / int(epochs) * 100), end="") if verbose: print("Complete!") #Xs = np.array(Xs) #Ys = np.array(Ys) if verbose: print("Training model...", end='') self.model.fit(Xs, Ys) if verbose: print("End with R^2: {value:0.4f}".format( value=self.model.score(Xs, Ys))) def evaluate(self, bs): return self.model.predict(bs) def evaluate_single(self, b): return self.model.predict([b])[0]
def fit(self, X, y): X, y = np.array(X), np.array(y) for i, (train_idx, test_idx) in enumerate(self.folds.split(X)): # print("Fold #%u" % (i + 1)) # print("=========================================") X_train, y_train = X[train_idx], y[train_idx] best = (float('inf'), None) X_test, y_test = X[test_idx], y[test_idx] for num_features in self.FEATURES: cf = MultiOutputRegressor(RandomForestRegressor(max_features=num_features, n_estimators=100, n_jobs=-1)) cf.fit(X_train, y_train) y_pred = cf.predict(X_test) error = mean_absolute_error(y_test, y_pred) if error < best[0]: best = (error, cf) self.models.append(best[1]) return self
def regression(train_x, train_label, text_x, text_label): clf = MultiOutputRegressor(svm.SVR(gamma='scale')) clf.fit(train_x, train_label) y_pred = pd.DataFrame(clf.predict(text_x)) catagory = y_pred.shape[1] # Person=np.corrcoef(text_label.iloc[:,],y_pred,rowvar=False) # print(text_label.iloc[:,0]) # print(text_label.shape) # print("Person: ") # print(Person.shape) RMSE = np.sqrt(mean_squared_error(text_label, y_pred, multioutput='raw_values')) result = [] for i in range(0, catagory): result.append(RMSE[i]) return result
def train_model(x, y, n): x = x.iloc[:n, :] y = y.iloc[:n, :] est = linear_model.RidgeCV( alphas=[0.05, 0.1, 0.3, 1, 3, 5, 10, 15, 30, 50, 75]) model = MultiOutputRegressor(est) model = model.fit(x, y) return model
def test_multi_target_sparse_regression(): X, y = datasets.make_regression(n_targets=3) X_train, y_train = X[:50], y[:50] X_test = X[50:] for sparse in [ sp.csr_matrix, sp.csc_matrix, sp.coo_matrix, sp.dok_matrix, sp.lil_matrix ]: rgr = MultiOutputRegressor(Lasso(random_state=0)) rgr_sparse = MultiOutputRegressor(Lasso(random_state=0)) rgr.fit(X_train, y_train) rgr_sparse.fit(sparse(X_train), y_train) assert_almost_equal(rgr.predict(X_test), rgr_sparse.predict(sparse(X_test)))
def fit(x_train, y_train, parameters_01, parameters_median, parameters_09): regressor_median = BaggingRegressor(MultiOutputRegressor( LGBMRegressor(objective='quantile', alpha=0.5, **parameters_median)), n_jobs=-1, n_estimators=15) regressor_median.fit(x_train, y_train) regressor_0_1 = MultiOutputRegressor( LGBMRegressor(objective='quantile', alpha=0.1, **parameters_01)) regressor_0_1.fit(x_train, y_train) regressor_0_9 = MultiOutputRegressor( LGBMRegressor(objective='quantile', alpha=0.9, **parameters_09)) regressor_0_9.fit(x_train, y_train) return regressor_median, regressor_0_1, regressor_0_9
def train_right_eye_cyl_axis_model(config): try: print("Model training started...") # Import the dataset bucket_file = get_training_data(config) dataset = pd.read_csv(io.BytesIO(bucket_file['Body'].read())) # Extract data for the right eye - cyl/axis columns = config["data_set_columns"]["right_eye_cyl_axis"] right_eye_dataset = pd.DataFrame(dataset, columns=columns) # Check for duplicates and remove if exists duplicates_exists = right_eye_dataset.duplicated().any() if duplicates_exists: right_eye_dataset = right_eye_dataset.drop_duplicates() # map categorical data notes_map = {"happy": 1, "unhappy": 0} right_eye_dataset["notes"] = right_eye_dataset["notes"].map(notes_map) # Create feature matrix X = right_eye_dataset.iloc[:, :-3] # Create predicted matrix y = right_eye_dataset.iloc[:, 7:9] # Split dataset to train and test set from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.3, random_state=42) # SVR - Train the model from sklearn.svm import SVR from sklearn.multioutput import MultiOutputRegressor regressor = MultiOutputRegressor(SVR(kernel = "linear"), n_jobs = -1) regressor.fit(X_train, y_train) print("Model training done.") return list(X.columns), regressor except Exception as e: print(str(e)) return None, None
def objective(space): global X, Xt, y, yt clf = MultiOutputRegressor( XGBRegressor(n_estimators=int(space['n_estimators']), max_depth=int(space['max_depth']), gamma=space['gamma'], reg_alpha=space['reg_alpha'], reg_lambda=space['reg_lambda'], min_child_weight=space['min_child_weight'])) clf.fit(X, y, verbose=False) pred = clf.predict(Xt) accuracy = mean_squared_error(yt, pred) print("SCORE:", accuracy) return {'loss': accuracy, 'status': STATUS_OK}
def test_sklearn_multioutput_regressor(self): for n_targets in [2, 3, 4]: for model_class in [DecisionTreeRegressor, ExtraTreesRegressor, RandomForestRegressor, LinearRegression]: seed = random.randint(0, 2**32 - 1) if model_class != LinearRegression: model = MultiOutputRegressor(model_class(random_state=seed)) else: model = MultiOutputRegressor(model_class()) X, y = datasets.make_regression( n_samples=50, n_features=10, n_informative=5, n_targets=n_targets, random_state=seed ) X = X.astype("float32") y = y.astype("float32") model.fit(X, y) torch_model = hummingbird.ml.convert(model, "torch", extra_config={constants.TREE_OP_PRECISION_DTYPE: "float64"}) self.assertTrue(torch_model is not None) np.testing.assert_allclose(model.predict(X), torch_model.predict(X), rtol=1e-5, atol=1e-4, err_msg="{}/{}/{}".format(n_targets, model_class, seed))
def multir(request, model): bolsa = pd.read_csv("app/data/bolsa.csv", index_col='Date').groupby('Codigo') lista = [ 'B3SA3', 'BBDC4', 'BRAP4', 'BRFS3', 'BRKM5', 'BRML3', 'BTOW3', 'CCRO3', 'CIEL3', 'CMIG4', 'CSAN3', 'CSNA3', 'CYRE3', 'ECOR3', 'EGIE3', 'ELET3', 'ELET6', 'EMBR3', 'ENBR3', 'EQTL3', 'ESTC3', 'FLRY3', 'GGBR4', 'GOAU4', 'GOLL4', 'HYPE3', 'IGTA3', 'KROT3', 'ITSA4', 'ITUB4', 'LAME4', 'LREN3', 'MGLU3', 'MRFG3', 'MRVE3', 'MULT3', 'NATU3', 'PCAR4', 'PETR3', 'PETR4', 'QUAL3', 'RADL3', 'RENT3', 'SANB11', 'SBSP3', 'TAEE11', 'TIMP3', 'UGPA3', 'USIM5', 'VALE3', 'VIVT4', 'WEGE3' ] resultado = [] for item in lista: bolsa = pd.read_csv("app/data/bolsa.csv", index_col='Date').groupby('Codigo') dados = bolsa.get_group(item) X = dados[['Open', 'High', 'Low', 'Close', 'Volume']] y = pd.DataFrame({ 'Alta_real': dados['High'].shift(-1).fillna(method='pad'), 'Baixa_real': dados['Low'].shift(-1).fillna(method='pad') }) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, shuffle=False, random_state=0) if (model == 'adr'): modelo = "Automatic Relevance Determination Regression" #regr_multi = MultiOutputRegressor(svm.SVR()) regr_multi = MultiOutputRegressor( linear_model.ARDRegression(compute_score=True)) elif (model == 'ada'): modelo = "Ada Regressor" regr_multi = MultiOutputRegressor( AdaBoostRegressor(random_state=0, n_estimators=100)) elif (model == 'GB'): modelo = "GradientBoostingRegressor" regr_multi = MultiOutputRegressor( GradientBoostingRegressor(random_state=1, n_estimators=10)) else: modelo = "LinerRegression com Bayesian Ridge" regr_multi = MultiOutputRegressor(linear_model.BayesianRidge()) regr_multi = regr_multi.fit(X_train, y_train) y_pred = regr_multi.predict(X_test) #print(item) #print(": ") #print(r2_score(y_test, y_pred)) #print(item,": ", r2_score(y_test, y_pred)) r = r2_score(y_test, y_pred) resultado.append([item, r]) resultado_geral = pd.DataFrame(resultado).to_html() context = {'modelo': modelo, 'resultado': resultado_geral} return render(request, 'app/multi.html', context)
def generate_joint_model(single_model): model = MultiOutputRegressor(single_model) model.fit(X_train, Y_train) score_train = model.score(X_train, Y_train) print('Score of train', round(score_train * 100, 1), "%") score = model.score(X_test, Y_test) print('Score of test', round(score * 100, 1), "%") model_path = model_folder + r"/" + \ str(round(score, 3)).replace('.', '_') + r"_" + \ str(model.get_params()['estimator']).split('(')[0] + \ '.joblib' joblib.dump(model, model_path) print("Save model file", model_path) return model, model_path
def train_diff_levels(noise, size): # Load data with specified amount of noise and number of examples. data = Data(noise, size, imageFiles='./datasets/noise_0_alt/train_data/regular/*.png', labelFiles='./datasets/noise_0_alt/train_data/regular/*.npy') # Train the SVR. svr = LinearSVR(tol=0.1, verbose=10) multi_svr = MultiOutputRegressor(svr, n_jobs=-1) multi_svr.fit(data.x / 255.0, data.y) # Save trained model. pickle.dump( multi_svr, open( "saved_models/svr/noise_{0}_training_{1}.ckpt".format(noise, size), 'wb'))
def multi_reg(data, out, saison): cols = [ 'temp_1', 'temp_2', 'mean_national_temp', 'humidity_1', 'humidity_2', 'consumption_secondary_1', 'consumption_secondary_2', 'consumption_secondary_3' ] output_col = ['consumption_1', 'consumption_2'] X_week, X_week_end = sub_data(data, cols, saison) Y_week, Y_week_end = sub_data(out, output_col, saison) from sklearn.multioutput import MultiOutputRegressor from sklearn.ensemble import RandomForestRegressor from sklearn.svm import SVR if saison == 'ete': clf_week = RandomForestRegressor(n_estimators=100, criterion='mae', random_state=0) clf_week.fit(X_week, Y_week) clf_week_end = MultiOutputRegressor( RandomForestRegressor(n_estimators=100, criterion='mae', random_state=0) ) #SVR(kernel ='rbf', gamma = 'scale', tol = 10e-5)) clf_week_end.fit(X_week_end, Y_week_end) else: clf_week = MultiOutputRegressor( RandomForestRegressor(n_estimators=100, criterion='mae', random_state=0) ) #SVR(kernel ='rbf', gamma = 'scale', tol = 10e-5)) clf_week.fit(X_week, Y_week) clf_week_end = RandomForestRegressor(n_estimators=100, criterion='mae', random_state=0) clf_week_end.fit(X_week_end, Y_week_end) print("training score {} : {}".format(saison, (clf_week.score( X_week, Y_week), clf_week_end.score(X_week_end, Y_week_end)))) return (clf_week, clf_week_end)
def runBaseLineRegression(model_params,data,estimator): #regr = MultiOutputRegressor(sklearn.linear_model.LinearRegression()) regr = MultiOutputRegressor(estimator) #regr = MultiOutputRegressor(sklearn.linear_model.BayesianRidge()) #regr = MultiOutputRegressor(sklearn.linear_model.Lasso()) #data AP_train,TRP_train = data[0] AP_dev,TRP_dev = data[1] if model_params["DirectionForward"]: X_train,Y_train,X_dev,Y_dev = TRP_train,AP_train,TRP_dev,AP_dev else: X_train,Y_train,X_dev,Y_dev = AP_train,TRP_train,AP_dev,TRP_dev model_params["OutputNames"],model_params["InputNames"] = model_params["InputNames"],model_params["OutputNames"] regr.fit(X_train,Y_train) Y_dev_pred = regr.predict(X_dev) Y_train_pred = regr.predict(X_train) if model_params["DirectionForward"]: #train mse_totoal_train = customUtils.mse_p(ix = (3,6),Y_pred = Y_train_pred,Y_true = Y_train) #dev mse_totoal_dev = customUtils.mse_p(ix = (3,6),Y_pred = Y_dev_pred,Y_true = Y_dev) else: mse_totoal_train = mse(Y_train,Y_train_pred,multioutput = 'raw_values') mse_totoal_dev = mse(Y_dev,Y_dev_pred,multioutput = 'raw_values') model_location = os.path.join('models',model_params["model_name"] + '.json') with open(os.path.join('model_params',model_params["model_name"] + '.json'), 'w') as fp: json.dump(model_params, fp, sort_keys=True) _ = run_eval_base(model_location,dataset = "train",email = model_params["email"]) _ = run_eval_base(model_location,dataset = "test",email = model_params["email"]) mse_total = run_eval_base(model_location,dataset = "dev",email = model_params["email"]) return (mse_totoal_train.tolist(),mse_totoal_dev.tolist(),mse_totoal_train.sum(),mse_totoal_dev.sum())
def decision_function(self, X): X = X.copy() X.iloc[:, :-2] *= 1e12 L, parcel_indices_L, subj_dict = self._get_lead_field_info() # use only Lead Fields of the subjects found in X subj_dict = dict((k, subj_dict[k]) for k in np.unique(X['subject'])) self.lead_field, self.parcel_indices = [], [] subj_dict_x = {} for idx, s_key in enumerate(subj_dict.keys()): subj_dict_x[s_key] = idx self.lead_field.append(L[subj_dict[s_key]]) self.parcel_indices.append(parcel_indices_L[subj_dict[s_key]]) X['subject_id'] = X['subject'].map(subj_dict_x) X.astype({'subject_id': 'int32'}).dtypes model = MultiOutputRegressor(self.model, n_jobs=self.n_jobs) X = X.reset_index(drop=True) betas = np.empty((len(X), 0)).tolist() for subj_idx in np.unique(X['subject_id']): l_used = self.lead_field[subj_idx] X_used = X[X['subject_id'] == subj_idx] X_used = X_used.iloc[:, :-2] norms = l_used.std(axis=0) l_used = l_used / norms[None, :] alpha_max = abs(l_used.T.dot(X_used.T)).max() / len(l_used) alpha = 0.2 * alpha_max model.estimator.alpha = alpha model.fit(l_used, X_used.T) # cross validation done here for idx, idx_used in enumerate(X_used.index.values): est_coef = np.abs(_get_coef(model.estimators_[idx])) est_coef /= norms beta = pd.DataFrame( np.abs(est_coef) ).groupby( self.parcel_indices[subj_idx]).max().transpose() betas[idx_used] = np.array(beta).ravel() betas = np.array(betas) return betas
def baseline(X_train, y_train, X_test, model_name): if model_name == 'linear': regr_multirf = MultiOutputRegressor(LinearRegression()) elif model_name == 'ridge': regr_multirf = MultiOutputRegressor(Ridge()) elif model_name == 'lasso': regr_multirf = MultiOutputRegressor(Lasso()) elif model_name == 'xgb': regr_multirf = MultiOutputRegressor(RandomForestRegressor(n_estimators=100, max_depth=2, random_state=0)) # first run local mean smape 0.84345, public 17.47 # too long else: raise Exception('unknown model', model_name) regr_multirf.fit(X_train, y_train) y_pred = regr_multirf.predict(X_test) return regr_multirf, y_pred
def score(params): params['n_estimators'] = int(params['n_estimators']) print("Training with params: ") print(params) sys.stdout.flush() gbm_model = MultiOutputRegressor(XGBRegressor(**params)) gbm_model.fit(DanQ_train, scores_train) predictions = gbm_model.predict(kmer_val) #getting score, MSE total_se = (scores_val - predictions)**2 mse = [] for i in range(4): mse.append(np.mean(total_se[:, i])) score = np.mean(mse) print("\tScore {0}\n\n".format(score)) return {'loss': score, 'status': STATUS_OK}
class SVM(): def __init__(self): self.model = MultiOutputRegressor(SVR(kernel='rbf', C=1e3, gamma=0.1)) def fit(self, train_input, train_target): self.model.fit(train_input, train_target) def predict(self, test_input): return self.model.predict(test_input) def save(self, code=50): filename = 'SVM' + str(code) + '.pkl' filepath = './model/' + filename joblib.dump(self.model, filepath) def load(self, code=50): filename = 'SVM' + str(code) + '.pkl' filepath = './model/' + filename self.model = joblib.load(filepath)
def make_bayesian_pred(df, next_week, debug=0): """ This method creates predictions using bayesian regression. """ space = { 'estimator__alpha_1': [1e-10, 1e-5, 1], 'estimator__alpha_2': [1e-10, 1e-5, 1], 'estimator__lambda_1': [1e-10, 1e-5, 1], 'estimator__lambda_2': [1e-10, 1e-5, 1], 'estimator__n_iter': [10, 300, 1000], 'estimator__normalize': [True, False], 'estimator__fit_intercept': [True, False] } params = { 'estimator__alpha_1': [1e-10, 1e-5, 1, 5], 'estimator__alpha_2': [1e-10, 1e-5, 1, 5], 'estimator__lambda_1': [1e-10, 1e-5, 1, 5], 'estimator__lambda_2': [1e-10, 1e-5, 1, 5], 'estimator__n_iter': [10, 300, 1000], 'estimator__normalize': [True, False], 'estimator__n_jobs': -1, 'n_jobs': -1, 'estimator__fit_intercept': [True, False] } X_train, X_test, Y_train, Y_test = process_data(df, next_week) multi_bay = MultiOutputRegressor(BayesianRidge()) #multi_bay.set_params(**params) #best_random = grid_search(multi_bay, space, next_week, 3, X_train, Y_train) multi_bay.fit(X_train, Y_train) next_week[Y_train.columns] = multi_bay.predict(next_week[X_train.columns]) if debug: y_pred_untrain = multi_bay.predict(X_train) print(next_week) print("Score: ", multi_bay.score(X_train, Y_train) * 100) print("MSE: ", metrics.mean_squared_error(Y_train, y_pred_untrain)) print( "CV: ", ms.cross_val_score(multi_bay, Y_train, y_pred_untrain, cv=10, scoring='neg_mean_squared_error')) return next_week
class DTRmodel: def __init__(self, fl, max_depth=8, num_est=300): """ Initialises new DNN model based on input features_dim, labels_dim, hparams :param features_dim: Number of input feature nodes. Integer :param labels_dim: Number of output label nodes. Integer :param hparams: Dict containing hyperparameter information. Dict can be created using create_hparams() function. hparams includes: hidden_layers: List containing number of nodes in each hidden layer. [10, 20] means 10 then 20 nodes. """ self.labels_dim = fl.labels_dim # Assuming that each task has only 1 dimensional output self.labels_scaler = fl.labels_scaler self.model = MultiOutputRegressor( AdaBoostRegressor(DecisionTreeRegressor(max_depth=max_depth), n_estimators=num_est)) self.normalise_labels = fl.normalise_labels def train_model(self, fl, save_mode=False, plot_name=None): training_features = fl.features_c_norm if self.normalise_labels: training_labels = fl.labels_norm else: training_labels = fl.labels self.model.fit(training_features, training_labels) return self.model def eval(self, eval_fl): features = eval_fl.features_c_norm if self.labels_dim == 1: y_pred = self.model.predict(features)[:, None] else: y_pred = self.model.predict(features) if self.normalise_labels: mse_norm = mean_squared_error(eval_fl.labels_norm, y_pred) mse = mean_squared_error( eval_fl.labels, self.labels_scaler.inverse_transform(y_pred)) else: mse_norm = -1 mse = mean_squared_error(eval_fl.labels, y_pred) return y_pred, mse, mse_norm
def train_model(self, params): ''' Input a dict, params, containing: loss_type: String, 'ls', 'lad', or 'huber' learning_rate: Float, ~0.1 n_estimators: Int, boosting stages, ~100 criterion: String, split quality, 'friedman_mse', 'mse', 'mae' max_depth: Int, depth of regressors, ~3 max_features: String, method, 'auto', 'sqrt', 'log2' Returns: Dict containing info on combination ''' loss_type = params['loss'] learning_rate = params['learning_rate'] n_estimators = int(params['n_estimators']) criterion = params['criterion'] max_depth = int(params['max_depth']) max_features = params['max_features'] model = MOR( skGBR(loss=loss_type, learning_rate=learning_rate, n_estimators=n_estimators, criterion=criterion, max_depth=max_depth, max_features=max_features)) # Print current combination print('Current GBR combination: {}'.format(params)) # Flat versions of y (power/flux distribution) y_tr_fl, y_te_fl = self.flat_y() # Fit model.fit(self.x_train, y_tr_fl) # Hyperopt loss for each combination y_predict = model.predict(self.x_test) hyp_loss = sklmse(y_te_fl, y_predict) self.tr_hist.update_history(params, hyp_loss, model) return {'loss': hyp_loss, 'status': STATUS_OK}
def train_model(self, params): ''' Input a dict, params, containing: nu: Float, fraction of support vectors (0,1] C: Float, penalty parameter of error (~1.0) kernel: String, 'linear', 'poly', 'rbf', sigmoid' degree: Int, degree of polynomial for poly gamma: String, 'scale'/'auto' for 'rbf', 'poly', 'sigmoid' Returns: Dict containing info on combination ''' kernel = params['kernel'] nu = params['nu'] C = params['C'] # Instantiate SVR if kernel in ['linear']: model = MOR(NuSVR(C=C, nu=nu, kernel=kernel)) elif kernel in ['rbf', 'sigmoid']: gamma = params['gamma'] model = MOR(NuSVR(C=C, nu=nu, kernel=kernel, gamma=gamma)) elif kernel in ['poly']: gamma = params['gamma'] degree = params['degree'] model = MOR( NuSVR(C=C, nu=nu, kernel=kernel, degree=degree, gamma=gamma)) # Print current combination print('Current SVR combination: {}'.format(params)) # Flat versions of y (power/flux distribution) y_tr_fl, y_te_fl = self.flat_y() # Fit model.fit(self.x_train, y_tr_fl) # Hyperopt loss for each combination y_predict = model.predict(self.x_test) hyp_loss = sklmse(y_te_fl, y_predict) self.tr_hist.update_history(params, hyp_loss, model) return {'loss': hyp_loss, 'status': STATUS_OK}
def training_with_cross_validation(npzfile_path='datasets/0507-all-110-results.npz', verbose=0): npzfile = np.load('./simulator/' + npzfile_path, allow_pickle=True) alloc, rt_50, rt_99, rps = npzfile['alloc'], npzfile['rt_50'], npzfile['rt_99'], npzfile['rps'] length = len(rps) # for i in range(length): # if alloc[i,0] * alloc[i,1] > 0: # rps[i, 0] *= 0.5 # if alloc[i,5] * alloc[i,4] > 0: # rps[i, 5] *= 2 # for i in range(length): # for j in range(6): # # if rps[i, j] > 1: # # rps[i, j] *= 2.0 # if (rps[i, j] > 0) & (rps[i, j] < 1): # rps[i, j] *= 0.5 # for i in range(length): # for j in range(6): # if rps[i, j] > 1: # if (j == 1) & (alloc[i, 0] * alloc[i, 1] > 0): # rps[i, j] *= 2.0 # if (rps[i, j] > 0) & (rps[i, j] < 1): # rps[i, j] *= 0.5 #: pre-processing rps = np.nan_to_num(rps.astype(float)) X_train, X_test, y_train, y_test = train_test_split(alloc, rps, test_size=0.1, random_state=42) # this random_state is not a hyper-parameter of Regressor if verbose: print("X_train {} => y_train {}".format(X_train.shape, y_train.shape)) print("X_test {} => y_test {}".format(X_test.shape, y_test.shape)) regr = MultiOutputRegressor(RandomForestRegressor(n_estimators=n_estimators, max_depth=max_depth, random_state=random_state)) cv_scores = cross_val_score(regr, X_train, y_train, cv=5, n_jobs=4) np.set_printoptions(precision=4, suppress=True) if verbose: print("5-fold cross validation scores:\n", cv_scores) regr.fit(X_train[:,0:7], y_train[:,0:7]) score = regr.score(X_test[:,0:7], y_test[:,0:7]) if verbose: print("R^2 score of regressor: %.4f" % score) return regr
def grant_predictor(onu_id,onu_df,window,predict,features,model,metric): index=0 # window start index_max = 0 # prediction end # list with metrics of each prediction in different observation windows metric_list = [] reg = MultiOutputRegressor(model)#Implement the model while index+window < len(onu_df): interval=index+window # window final position df_tmp = onu_df.iloc[index:interval] # training dataset if interval+predict < len(onu_df): # check if prediction doesnt overflow input data index_max = interval+predict else: index_max = len(onu_df)-1 # check if features evaluated is simple(counter) else counter+timestamp if len(features) == 1: X_pred = np.array(onu_df[features].iloc[interval:index_max]).reshape(-1,1) if len(X_pred) == 0: break # fitting the model reg.fit(np.array( df_tmp[features] ).reshape(-1,1) , df_tmp[['start','end']]) else: X_pred = onu_df[features].iloc[interval:index_max] if len(X_pred) == 0: break # fitting the model reg.fit(df_tmp[features] , df_tmp[['start','end']]) # make prediction pred = reg.predict(X_pred) # real values to compare with prediction Y_true = onu_df[['start','end']].iloc[interval:index_max] # metric calculation metric_list.append(metric(Y_true, pred,multioutput='uniform_average')) # shift past observations window in p positions index += predict return metric_list
def run_multi_output_regressor(X, y): total_acc = np.zeros(shape=(y.shape[1])) kf = KFold(n_splits=5) for i, (train_index, valid_index) in enumerate(kf.split(X)): x_train, x_valid = X[train_index], X[valid_index] y_train, y_valid = y[train_index], y[valid_index] # Train classifier lr = LinearRegression() mor = MultiOutputRegressor(lr) mor.fit(x_train, y_train) y_pred = np.rint(mor.predict(x_valid)) acc = accuracy_score(y_valid, y_pred) print(f"Iteration {i+1}: L1 = {acc}") total_acc = total_acc + acc print(f"Average accuracy = {total_acc/kf.get_n_splits()}") return total_acc
def stratCV(model, nfolds, train_X, train_Y, output_name, **params): mskf = MultilabelStratifiedKFold(n_splits=nfolds, shuffle=True) scores = [] for train_index, valid_index in mskf.split(train_X, train_Y): print("TRAIN:", train_index, "VALID:", valid_index) X_train, X_valid = train_X[train_index], train_X[valid_index] Y_train, Y_valid = train_Y[train_index], train_Y[valid_index] m = MultiOutputRegressor(model(**params)) m.fit(X_train, Y_train) y_preds = m.predict(X_valid) y_score = log_loss_metric(Y_valid, y_preds) print(y_score) scores.append(y_score) # Save to file in the current working directory joblib_file = "joblib_model_{}.pkl".format(output_name) joblib.dump((model, scores), joblib_file) return scores
class ML: def __init__(self): self.model = GradientBoostingRegressor() self.model = MultiOutputRegressor(self.model) def train(self, x, y): self.model.fit(x, y) def predict(self, x): return self.model.predict(x) @staticmethod def mse(x, y): return mean_squared_error(x, y) def save(self, model_file): joblib.dump(self.model, model_file) def load(self, model_file): self.model = joblib.load(model_file)
def train_stack_model( xtrain: Union[np.ndarray, pd.DataFrame], ytrain: Union[np.ndarray, pd.DataFrame], verbose: int = 0, n_jobs: int = 1, order: Tuple[str, str] = ("rf", "lr"), lr_params: Optional[Dict]=None, rf_params: Optional[Dict]=None ) -> BaseEstimator: rf_estimator = RandomForestRegressor( n_estimators=1_000, criterion="mse", n_jobs=n_jobs, random_state=123, warm_start=False, verbose=verbose, ) lr_estimator = LinearRegression() # Initialize GLM if order == ("rf", "lr"): stacking_regressor = StackingRegressor( estimators=[("Random Forest", rf_estimator)], final_estimator=lr_estimator ) elif order == ("lr", "rf"): stacking_regressor = StackingRegressor( estimators=[("Linear Regression", lr_estimator)], final_estimator=rf_estimator, ) else: raise ValueError() mo_regressor = MultiOutputRegressor(stacking_regressor, n_jobs=1) # train GLM t0 = time.time() mo_regressor.fit(xtrain, ytrain) t1 = time.time() - t0 if verbose > 0: print(f"Training time: {t1:.3f} secs.") return mo_regressor
def train_nmodel(data, labels, model, is_std, names): x_train, x_test, y_train, y_test = train_test_split(data, labels, test_size=0.25, random_state=42) mor = MultiOutputRegressor(model) mor.fit(x_train, y_train) y_pred = mor.predict(x_test) mse, r2 = get_metrics(y_test, y_pred, labels, is_std) cvs = cross_val_score(mor, data, labels, cv=4, scoring='neg_mean_squared_error') print(y_pred) print(x_train) images = [] for i in range(0, len(names)): images.append(create_graph(y_test[i], y_pred[i], names[i])) return mor, mse, r2, cvs, images
# Create a random dataset rng = np.random.RandomState(1) X = np.sort(200 * rng.rand(600, 1) - 100, axis=0) y = np.array([np.pi * np.sin(X).ravel(), np.pi * np.cos(X).ravel()]).T y += (0.5 - rng.rand(*y.shape)) X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=400, random_state=4) max_depth = 30 regr_multirf = MultiOutputRegressor(RandomForestRegressor(max_depth=max_depth, random_state=0)) regr_multirf.fit(X_train, y_train) regr_rf = RandomForestRegressor(max_depth=max_depth, random_state=2) regr_rf.fit(X_train, y_train) # Predict on new data y_multirf = regr_multirf.predict(X_test) y_rf = regr_rf.predict(X_test) # Plot the results plt.figure() s = 50 a = 0.4 plt.scatter(y_test[:, 0], y_test[:, 1], edgecolor='k', c="navy", s=s, marker="s", alpha=a, label="Data") plt.scatter(y_multirf[:, 0], y_multirf[:, 1], edgecolor='k',