def train_ensemble_predictor(self, data: np.ndarray, labels: np.ndarray, predictor: str = None, model_params: str = None): try: model_params = json.loads(model_params) except json.decoder.JSONDecodeError: model_params = yaml.load(model_params) model = self.MODELS[predictor](**model_params) if predictor == 'SVR': # If the model is an SVR, extend its functionality # to multi-target regression: model = MultiOutputRegressor(model) models_count, samples, classes = data.shape data = data.swapaxes(0, 1).reshape(samples, models_count * classes) self.predictor = model.fit(data, labels)
def test_multi_target_regression(): X, y = datasets.make_regression(n_targets=3) X_train, y_train = X[:50], y[:50] X_test, y_test = X[50:], y[50:] references = np.zeros_like(y_test) for n in range(3): rgr = GradientBoostingRegressor(random_state=0) rgr.fit(X_train, y_train[:, n]) references[:, n] = rgr.predict(X_test) rgr = MultiOutputRegressor(GradientBoostingRegressor(random_state=0)) rgr.fit(X_train, y_train) y_pred = rgr.predict(X_test) assert_almost_equal(references, y_pred)
def test_diff_detector_threshold(mode: str, n_features_x: int, n_features_y: int): """ Basic construction logic of thresholds_ attribute in the DiffBasedAnomalyDetector and DiffBasedKFCVAnomalyDetector """ X = np.random.random((300, n_features_x)) y = np.random.random((300, n_features_y)) base_estimator = MultiOutputRegressor(estimator=LinearRegression()) if mode == "tscv": model = DiffBasedAnomalyDetector(base_estimator=base_estimator) elif mode == "kfcv": model = DiffBasedKFCVAnomalyDetector(base_estimator=base_estimator) # Model has own implementation of cross_validate assert hasattr(model, "cross_validate") # When initialized it should not have a threshold calculated. assert not hasattr(model, "feature_thresholds_") assert not hasattr(model, "aggregate_threshold_") assert not hasattr(model, "feature_thresholds_per_fold_") assert not hasattr(model, "aggregate_thresholds_per_fold_") model.fit(X, y) # Until it has done cross validation, it has no threshold. assert not hasattr(model, "feature_thresholds_") assert not hasattr(model, "aggregate_threshold_") assert not hasattr(model, "feature_thresholds_per_fold_") assert not hasattr(model, "aggregate_thresholds_per_fold_") # Calling cross validate should set the threshold for it. model.cross_validate(X=X, y=y) # Now we have calculated thresholds based on cross validation folds assert hasattr(model, "feature_thresholds_") assert hasattr(model, "aggregate_threshold_") assert isinstance(model.feature_thresholds_, pd.Series) assert len(model.feature_thresholds_) == y.shape[1] assert all(model.feature_thresholds_.notna()) if not isinstance(model, DiffBasedKFCVAnomalyDetector): assert hasattr(model, "feature_thresholds_per_fold_") assert hasattr(model, "aggregate_thresholds_per_fold_") assert isinstance(model.feature_thresholds_per_fold_, pd.DataFrame) assert isinstance(model.aggregate_thresholds_per_fold_, dict)
def find_best_params(train_data, train_labels, test_data, test_labels): test_len = len(test_data) # Search space where the best params will be chosen c_values = [ 0.0000001, 0.0000005, 0.000001, 0.000005, 0.00001, 0.00005, 0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 10.0, 50.0, 100.0, 500.0, 1000.0, 5000.0, 10000.0, 50000.0 ] c_val_len = len(c_values) eps_values = [ 0.0000001, 0.0000005, 0.000001, 0.000005, 0.00001, 0.00005, 0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 10.0 ] eps_val_len = len(eps_values) # Variables to be set according to the RMSEs to be calculated min_rmse_sum = 1e10 c_idx = -1 eps_idx = -1 for i in range(c_val_len): for j in range(eps_val_len): svm_reg = svm.SVR(C=c_values[i], epsilon=eps_values[j]) pred_labels = MultiOutputRegressor(svm_reg).fit( train_data, train_labels).predict(test_data) rmse_lat = 0.0 rmse_long = 0.0 for k in range(test_len): rmse_lat = rmse_lat + (pred_labels[k][0] - test_labels.iloc[k, 0])**2 rmse_long = rmse_long + (pred_labels[k][1] - test_labels.iloc[k, 1])**2 rmse_lat = math.sqrt(rmse_lat / test_len) rmse_long = math.sqrt(rmse_long / test_len) if (rmse_lat + rmse_long < min_rmse_sum): min_rmse_sum = rmse_lat + rmse_long c_idx = i eps_idx = j print('Best C', c_values[c_idx]) print('Best EPS', eps_values[eps_idx])
def baselineModels(model_name): if model_name == 'REG': model = LinearRegression() elif model_name == 'SVR': model = SVR(cache_size=1000) elif model_name == 'TREE': model = RandomForestRegressor() elif model_name == 'ENSEMBLE': #ensemble of linear, polynomial regression, Random Forest Regressor model = [] #list of models model.append(LinearRegression()) model.append(SVR()) model.append(RandomForestRegressor()) if prediction_type == 'multi': model = MultiOutputRegressor(model, n_jobs=-1) return model
def adaMultiple(X, y): #score = make_scorer(mean_squared_error) temp_cls_ = AdaBoostRegressor() parameters = { 'estimator__n_estimators': [50, 60, 70, 80], 'estimator__learning_rate': [0.01,0.1,1], } param_tuner_ = GridSearchCV(MultiOutputRegressor(temp_cls_), param_grid=parameters) param_tuner_.fit(X, y) cls = param_tuner_.best_estimator_.fit(X, y) return cls
def base_estimator(self, value): # Build `base_estimator` if string given if isinstance(value, str): value = cook_estimator( value, space=self.space, random_state=self.rng.randint(0, np.iinfo(np.int32).max) ) # Check if regressor if not is_regressor(value) and value is not None: raise ValueError(f"`base_estimator` must be a regressor. Got {value}") # Treat per second acquisition function specially is_multi_regressor = isinstance(value, MultiOutputRegressor) if self.acq_func.endswith("ps") and not is_multi_regressor: value = MultiOutputRegressor(value) self._base_estimator = value
def fit(self, X, y): X, y = np.array(X), np.array(y) for i, (train_idx, test_idx) in enumerate(self.folds.split(X)): # print("Fold #%u" % (i + 1)) # print("=========================================") X_train, y_train = X[train_idx], y[train_idx] best = (float('inf'), None) X_test, y_test = X[test_idx], y[test_idx] for num_features in self.FEATURES: cf = MultiOutputRegressor(RandomForestRegressor(max_features=num_features, n_estimators=100, n_jobs=-1)) cf.fit(X_train, y_train) y_pred = cf.predict(X_test) error = mean_absolute_error(y_test, y_pred) if error < best[0]: best = (error, cf) self.models.append(best[1]) return self
def regression(train_x, train_label, text_x, text_label): clf = MultiOutputRegressor(svm.SVR(gamma='scale')) clf.fit(train_x, train_label) y_pred = pd.DataFrame(clf.predict(text_x)) catagory = y_pred.shape[1] # Person=np.corrcoef(text_label.iloc[:,],y_pred,rowvar=False) # print(text_label.iloc[:,0]) # print(text_label.shape) # print("Person: ") # print(Person.shape) RMSE = np.sqrt(mean_squared_error(text_label, y_pred, multioutput='raw_values')) result = [] for i in range(0, catagory): result.append(RMSE[i]) return result
def __init__( self, tracker: ModelTracker, objective: Literal["regression", "ranking"] = "regression", use_simple_dataset_features: bool = False, use_seasonal_naive_performance: bool = False, use_catch22_features: bool = False, predict: Optional[List[str]] = None, output_normalization: OutputNormalization = None, impute_simulatable: bool = False, ): """ Args: tracker: A tracker that can be used to impute latency and number of model parameters into model performances. Also, it is required for some input features. objective: The optimization objective for the XGBoost estimators. use_simple_dataset_features: Whether to use dataset features to predict using a weighted average. use_seasonal_naive_performance: Whether to use the Seasonal Naïve nCRPS as dataset featuers. Requires the cacher to be set. use_catch22_features: Whether to use catch22 features for datasets statistics. Ignored if `use_dataset_features` is not set. predict: The metrics to predict. All if not provided. output_normalization: The type of normalization to apply to the features of each dataset independently. `None` applies no normalization, "quantile" applies quantile normalization, and "standard" transforms data to have zero mean and unit variance. impute_simulatable: Whether the tracker should impute latency and number of model parameters into the returned performance object. """ super().__init__(tracker, predict, output_normalization, impute_simulatable) self.use_ranking = objective == "ranking" self.config_transformer = ConfigTransformer( add_model_features=True, add_dataset_statistics=use_simple_dataset_features, add_seasonal_naive_performance=use_seasonal_naive_performance, add_catch22_features=use_catch22_features, tracker=tracker, ) if self.use_ranking: base_estimator = XGBRanker(objective="rank:pairwise", nthread=4) else: base_estimator = XGBRegressor(nthread=4) self.estimator = MultiOutputRegressor(base_estimator)
def train_right_eye_cyl_axis_model(config): try: print("Model training started...") # Import the dataset bucket_file = get_training_data(config) dataset = pd.read_csv(io.BytesIO(bucket_file['Body'].read())) # Extract data for the right eye - cyl/axis columns = config["data_set_columns"]["right_eye_cyl_axis"] right_eye_dataset = pd.DataFrame(dataset, columns=columns) # Check for duplicates and remove if exists duplicates_exists = right_eye_dataset.duplicated().any() if duplicates_exists: right_eye_dataset = right_eye_dataset.drop_duplicates() # map categorical data notes_map = {"happy": 1, "unhappy": 0} right_eye_dataset["notes"] = right_eye_dataset["notes"].map(notes_map) # Create feature matrix X = right_eye_dataset.iloc[:, :-3] # Create predicted matrix y = right_eye_dataset.iloc[:, 7:9] # Split dataset to train and test set from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.3, random_state=42) # SVR - Train the model from sklearn.svm import SVR from sklearn.multioutput import MultiOutputRegressor regressor = MultiOutputRegressor(SVR(kernel = "linear"), n_jobs = -1) regressor.fit(X_train, y_train) print("Model training done.") return list(X.columns), regressor except Exception as e: print(str(e)) return None, None
def run_one_configuration( full_train_covariate_matrix, complete_target, new_valid_covariate_data_frames, new_valid_target_data_frame, std_data_frame, target_clusters, featurizer, model_name, parameters, log_file, ): model_baseline = dict() model_baseline["type"] = model_name model_baseline["target_clusters"] = target_clusters if model_name == "multi_task_lasso": model = MultiTaskLasso(max_iter=5000, **parameters) elif model_name == "xgboost": model = MultiOutputRegressor( XGBRegressor(n_jobs=10, objective="reg:squarederror", verbosity=0, **parameters)) model.fit(featurizer(full_train_covariate_matrix), complete_target.to_numpy(copy=True)) model_baseline["model"] = lambda x: model.predict(featurizer(x)) skill, _, _, _ = location_wise_metric( new_valid_target_data_frame, new_valid_covariate_data_frames, std_data_frame, model_baseline, "skill", ) cos_sim, _, _, _ = location_wise_metric( new_valid_target_data_frame, new_valid_covariate_data_frames, std_data_frame, model_baseline, "cosine-sim", ) with open(log_file, "a") as f: f.write(f"{len(target_clusters)} {parameters} {skill} {cos_sim}\n")
def first_stage(): return GridSearchCVList([ LinearRegression(), WeightedMultiTaskLasso( alpha=0.05, fit_intercept=True, tol=1e-6, random_state=123), RandomForestRegressor(n_estimators=100, max_depth=3, min_samples_leaf=10, random_state=123), MultiOutputRegressor( GradientBoostingRegressor(n_estimators=20, max_depth=3, min_samples_leaf=10, random_state=123)) ], param_grid_list=[{}, {}, {}, {}], cv=3, iid=True)
def XGBoost_mod(self, daily_df, interval_forecast): test_df = daily_df.loc['Total'].T final_df = test_df.copy() fixed_interval = 5 for i in range(fixed_interval + interval_forecast): final_df = pd.concat([test_df.shift(i + 1), final_df], axis=1) final_df = final_df.iloc[fixed_interval + interval_forecast:, 1:] final_df.columns = [i for i in range(fixed_interval + interval_forecast)] model = xgb.XGBRegressor(n_estimators=300, early_stopping_rounds=50, verbosity=0) x, y = final_df.iloc[:, :-interval_forecast], final_df.iloc[:, -interval_forecast:] x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.1) multi_model = MultiOutputRegressor(model).fit(x_train, y_train) x_forecast = pd.DataFrame(final_df.iloc[-1, interval_forecast:].tolist(), index=x_train.columns).T pred = multi_model.predict(x_forecast) return pred[0]
def generate_joint_model(single_model): model = MultiOutputRegressor(single_model) model.fit(X_train, Y_train) score_train = model.score(X_train, Y_train) print('Score of train', round(score_train * 100, 1), "%") score = model.score(X_test, Y_test) print('Score of test', round(score * 100, 1), "%") model_path = model_folder + r"/" + \ str(round(score, 3)).replace('.', '_') + r"_" + \ str(model.get_params()['estimator']).split('(')[0] + \ '.joblib' joblib.dump(model, model_path) print("Save model file", model_path) return model, model_path
def objective(space): global X, Xt, y, yt clf = MultiOutputRegressor( XGBRegressor(n_estimators=int(space['n_estimators']), max_depth=int(space['max_depth']), gamma=space['gamma'], reg_alpha=space['reg_alpha'], reg_lambda=space['reg_lambda'], min_child_weight=space['min_child_weight'])) clf.fit(X, y, verbose=False) pred = clf.predict(Xt) accuracy = mean_squared_error(yt, pred) print("SCORE:", accuracy) return {'loss': accuracy, 'status': STATUS_OK}
def train_diff_levels(noise, size): # Load data with specified amount of noise and number of examples. data = Data(noise, size, imageFiles='./datasets/noise_0_alt/train_data/regular/*.png', labelFiles='./datasets/noise_0_alt/train_data/regular/*.npy') # Train the SVR. svr = LinearSVR(tol=0.1, verbose=10) multi_svr = MultiOutputRegressor(svr, n_jobs=-1) multi_svr.fit(data.x / 255.0, data.y) # Save trained model. pickle.dump( multi_svr, open( "saved_models/svr/noise_{0}_training_{1}.ckpt".format(noise, size), 'wb'))
def load_SVM(): ''' Loads Support Vector Machine and gives a name for the output files. Parameters : None Returns : model_name : (str) Name of the model for output file. clf : (Classifier) Building and Floor Classifier regr : (REgressor) Longitude and Latitude Regressor ''' model_name = "Support Vector Machine" clf = SVC(C=100, kernel="linear", max_iter=1000) clf = MultiOutputClassifier(clf) regr = SVR(C=100, kernel="linear", max_iter=1000) regr = MultiOutputRegressor(regr) return model_name, clf, regr
def randomSearch(base_model, random_grid): random = RandomizedSearchCV(MultiOutputRegressor(base_model), param_distributions=random_grid, n_iter=100, cv=3, verbose=2, random_state=42, n_jobs=-1) random.fit(train_X, train_y) print(random.best_params_) best_random = random.best_estimator_ pred_y_train = best_random.predict(train_X) print_scores(train_y_array, pred_y_train) pred_y_test = best_random.predict(test_X) print_scores(test_y_array, pred_y_test) pred_y_dev = best_random.predict(dev_X) print_scores(dev_y_array, pred_y_dev)
def crossValidationMLPR(X, Y): """Fonction qui essaie plusieurs possibilités""" # On découpe le set en set d'enrtainement et de validation print("***Decoupe le set de validation***") x_train, x_validation, y_train, y_validation_txt = train_test_split( X, Y, stratify=Y, test_size=0.2, shuffle=True) y_train, y_validation = transformerGranuArgi( y_train), transformerGranuArgi(y_validation_txt) print('***Definition des parametres a tester***') param = { 'hidden_layer_sizes': [ tuple(np.random.randint(20, 35, np.random.randint(3, 5, 1))) for _ in range(5) ] } print('***Definition des modeles a entrainer***') mlpr = [ MLPRegressor(solver='adam', max_iter=1000, alpha=1e-5, activation='tanh', hidden_layer_sizes=param['hidden_layer_sizes'][i]) for i in range(len(param['hidden_layer_sizes'])) ] multioutput_rna = [MultiOutputRegressor(modele) for modele in mlpr] # Score de resultat justes sur le set de validation resultat_sur_validation = [ 0 for _ in range(len(param['hidden_layer_sizes'])) ] for i, modele in enumerate(multioutput_rna): print( f"[Entrainement du modele {i}] Couches de neurones : {param['hidden_layer_sizes'][i]}" ) modele.fit(x_train, y_train) print(modele.score(x_validation, y_validation)) y_res = modele.predict(x_validation) y_res = conversionPredictionSol(y_res) print(scorePrediction(y_res, np.array(y_validation_txt))) print('\n')
def runBaseLineRegression(model_params,data,estimator): #regr = MultiOutputRegressor(sklearn.linear_model.LinearRegression()) regr = MultiOutputRegressor(estimator) #regr = MultiOutputRegressor(sklearn.linear_model.BayesianRidge()) #regr = MultiOutputRegressor(sklearn.linear_model.Lasso()) #data AP_train,TRP_train = data[0] AP_dev,TRP_dev = data[1] if model_params["DirectionForward"]: X_train,Y_train,X_dev,Y_dev = TRP_train,AP_train,TRP_dev,AP_dev else: X_train,Y_train,X_dev,Y_dev = AP_train,TRP_train,AP_dev,TRP_dev model_params["OutputNames"],model_params["InputNames"] = model_params["InputNames"],model_params["OutputNames"] regr.fit(X_train,Y_train) Y_dev_pred = regr.predict(X_dev) Y_train_pred = regr.predict(X_train) if model_params["DirectionForward"]: #train mse_totoal_train = customUtils.mse_p(ix = (3,6),Y_pred = Y_train_pred,Y_true = Y_train) #dev mse_totoal_dev = customUtils.mse_p(ix = (3,6),Y_pred = Y_dev_pred,Y_true = Y_dev) else: mse_totoal_train = mse(Y_train,Y_train_pred,multioutput = 'raw_values') mse_totoal_dev = mse(Y_dev,Y_dev_pred,multioutput = 'raw_values') model_location = os.path.join('models',model_params["model_name"] + '.json') with open(os.path.join('model_params',model_params["model_name"] + '.json'), 'w') as fp: json.dump(model_params, fp, sort_keys=True) _ = run_eval_base(model_location,dataset = "train",email = model_params["email"]) _ = run_eval_base(model_location,dataset = "test",email = model_params["email"]) mse_total = run_eval_base(model_location,dataset = "dev",email = model_params["email"]) return (mse_totoal_train.tolist(),mse_totoal_dev.tolist(),mse_totoal_train.sum(),mse_totoal_dev.sum())
def decision_function(self, X): X = X.copy() X.iloc[:, :-2] *= 1e12 L, parcel_indices_L, subj_dict = self._get_lead_field_info() # use only Lead Fields of the subjects found in X subj_dict = dict((k, subj_dict[k]) for k in np.unique(X['subject'])) self.lead_field, self.parcel_indices = [], [] subj_dict_x = {} for idx, s_key in enumerate(subj_dict.keys()): subj_dict_x[s_key] = idx self.lead_field.append(L[subj_dict[s_key]]) self.parcel_indices.append(parcel_indices_L[subj_dict[s_key]]) X['subject_id'] = X['subject'].map(subj_dict_x) X.astype({'subject_id': 'int32'}).dtypes model = MultiOutputRegressor(self.model, n_jobs=self.n_jobs) X = X.reset_index(drop=True) betas = np.empty((len(X), 0)).tolist() for subj_idx in np.unique(X['subject_id']): l_used = self.lead_field[subj_idx] X_used = X[X['subject_id'] == subj_idx] X_used = X_used.iloc[:, :-2] norms = l_used.std(axis=0) l_used = l_used / norms[None, :] alpha_max = abs(l_used.T.dot(X_used.T)).max() / len(l_used) alpha = 0.2 * alpha_max model.estimator.alpha = alpha model.fit(l_used, X_used.T) # cross validation done here for idx, idx_used in enumerate(X_used.index.values): est_coef = np.abs(_get_coef(model.estimators_[idx])) est_coef /= norms beta = pd.DataFrame( np.abs(est_coef) ).groupby( self.parcel_indices[subj_idx]).max().transpose() betas[idx_used] = np.array(beta).ravel() betas = np.array(betas) return betas
def train_consumer(): cdf = pd.read_csv(CONSUMER_TRAINING) xs = ['risk', 'delta_risk', 'grat_payoff', 'delta_grat_payoff',\ 'inv_payoff', 'delta_inv_payoff', 'surface_area_risk_factor',\ 'delta_surface_area_risk_factor'] ys = ['GREED', 'FOCUS', 'SPEND', 'INVEST'] cx, cy = cdf[xs], cdf[ys] ''' will use multi-output regressor ''' model = MultiOutputRegressor( GradientBoostingRegressor(random_state=0)).fit(cx, cy) # clear CMODEL_FILE open(CMODEL_FILE, 'w').close() pickle.dump(model, open(CMODEL_FILE, 'wb'))
def evaluate(individual): C = 1 + 2 * abs(individual[0]) * 1.00e03 epsilon = 0.1 + abs(individual[1]) * 0.1 + 0.02 gamma = abs(individual[2]) * 0.1 + 0.02 multi_regr_rbf = MultiOutputRegressor( SVR(kernel='rbf', C=C, epsilon=epsilon, gamma=gamma)) model = multi_regr_rbf.fit(x_train, y_train) output = multi_regr_rbf.predict(x_test) r_squared = abs(multi_regr_rbf.score(x_test, y_test)) if r_squared > 1: r_squared = 0 params = (r_squared, e.AkaikeInformationCriterion_c(output), e.BayesianInformationCriterion(output), e.PRESS(output, y_test), e.MAPE(output, y_test), e.StructuralRiskMinimisation(output, y_test), e.FinalPredictionError(output, y_test), e.RMSErrors(output, y_test)) print("The parameters are: ", params) return params
def gbr_model(yvar, n_estimators, max_depth, min_samples_leaf, min_samples_split, max_features, loss): if max_features != 'auto': max_features = int(max_features) n_estimators, min_samples_leaf, min_samples_split, max_depth = \ int(n_estimators), int(min_samples_leaf), int(min_samples_split), int(max_depth) reg = GradientBoostingRegressor(random_state=42, max_depth=max_depth, n_estimators=n_estimators, max_features=max_features, min_samples_leaf=min_samples_leaf, loss=loss, min_samples_split=min_samples_split) if yvar.shape[1] == 1: reg_trans = reg if yvar.shape[1] != 1: reg_trans = MultiOutputRegressor(reg, n_jobs=-1) return reg_trans
def make_bayesian_pred(df, next_week, debug=0): """ This method creates predictions using bayesian regression. """ space = { 'estimator__alpha_1': [1e-10, 1e-5, 1], 'estimator__alpha_2': [1e-10, 1e-5, 1], 'estimator__lambda_1': [1e-10, 1e-5, 1], 'estimator__lambda_2': [1e-10, 1e-5, 1], 'estimator__n_iter': [10, 300, 1000], 'estimator__normalize': [True, False], 'estimator__fit_intercept': [True, False] } params = { 'estimator__alpha_1': [1e-10, 1e-5, 1, 5], 'estimator__alpha_2': [1e-10, 1e-5, 1, 5], 'estimator__lambda_1': [1e-10, 1e-5, 1, 5], 'estimator__lambda_2': [1e-10, 1e-5, 1, 5], 'estimator__n_iter': [10, 300, 1000], 'estimator__normalize': [True, False], 'estimator__n_jobs': -1, 'n_jobs': -1, 'estimator__fit_intercept': [True, False] } X_train, X_test, Y_train, Y_test = process_data(df, next_week) multi_bay = MultiOutputRegressor(BayesianRidge()) #multi_bay.set_params(**params) #best_random = grid_search(multi_bay, space, next_week, 3, X_train, Y_train) multi_bay.fit(X_train, Y_train) next_week[Y_train.columns] = multi_bay.predict(next_week[X_train.columns]) if debug: y_pred_untrain = multi_bay.predict(X_train) print(next_week) print("Score: ", multi_bay.score(X_train, Y_train) * 100) print("MSE: ", metrics.mean_squared_error(Y_train, y_pred_untrain)) print( "CV: ", ms.cross_val_score(multi_bay, Y_train, y_pred_untrain, cv=10, scoring='neg_mean_squared_error')) return next_week
def train_model(X, Y, layers=None, weight=False, with_onsets=False): """ Create and train a model from the given data. Parameters ========== X : np.ndarray An N x (history + num_features + 2) ndarray containing the N data points on which to train. Y : np.ndarray A length-N array, containing the targets for each data point. Or, an (N,2) target ndarray, if with_onsets is True. layers : list(int) The hidden layer sizes for the trained network. Defaults to None, which is logistic regression. weight : boolean True to have the model output prior weights, and False to have it output the prior directly. Defaults to False. with_onsets : boolean True to output presence and onset values. False for only presence. Returns ======= model : sklearn classifier A trained model. """ if weight: convert_targets_to_weight(X, Y, with_onsets=with_onsets) if layers is None or len(layers) == 0: la = -1 ac = -2 strengths = strengths = np.abs(X[:, la] - X[:, ac]) regressor = MultiOutputRegressor( LogisticRegression()) if with_onsets else LogisticRegression() model = regressor.fit(X, Y, sample_weight=strengths) else: model = MLPClassifier(max_iter=1000, hidden_layer_sizes=layers).fit(X, Y) return model
def create_model(self, C=-1, gamma=-1, epsilon=-1): # questo controllo serve per dire che di solito uso i valori di default scelti da me (inizializzati nel costruttore), # altrimenti usi i valori passati come parametro if (C == -1): C = self.C if (gamma == -1): gamma = self.gamma if (epsilon == -1): epsilon = self.epsilon self.model = SVR(C=C, gamma=gamma, epsilon=epsilon) if ( self.output_multi ): # se uso molteplici y, devo fare il wrapping del svr in modo da saperle gestire multi_output_model = MultiOutputRegressor(estimator=self.model) self.model = multi_output_model print self.model return self.model
def __init__(self, fl, max_depth=8, num_est=300, chain=False): """ Initialises new DTR model :param fl: fl class :param max_depth: max depth of each tree :param num_est: Number of estimators in the ensemble of trees :param chain: regressor chain (True) or independent multi-output (False) """ self.labels_dim = fl.labels_dim self.labels_scaler = fl.labels_scaler if chain: self.model = RegressorChain( AdaBoostRegressor(DecisionTreeRegressor(max_depth=max_depth), n_estimators=num_est)) else: self.model = MultiOutputRegressor( AdaBoostRegressor(DecisionTreeRegressor(max_depth=max_depth), n_estimators=num_est)) self.normalise_labels = fl.normalise_labels
def _check_arguments(self, base_estimator, n_initial_points, acq_optimizer, dimensions): """Check arguments for sanity.""" if isinstance(base_estimator, str): base_estimator = cook_estimator(base_estimator, space=dimensions, random_state=self.rng.randint( 0, np.iinfo(np.int32).max)) if not is_regressor(base_estimator) and base_estimator is not None: raise ValueError("%s has to be a regressor." % base_estimator) is_multi_regressor = isinstance(base_estimator, MultiOutputRegressor) if "ps" in self.acq_func and not is_multi_regressor: self.base_estimator_ = MultiOutputRegressor(base_estimator) else: self.base_estimator_ = base_estimator if n_initial_points < 0: raise ValueError("Expected `n_initial_points` >= 0, got %d" % n_initial_points) self._n_initial_points = n_initial_points self.n_initial_points_ = n_initial_points if acq_optimizer == "auto": if has_gradients(self.base_estimator_): acq_optimizer = "lbfgs" else: acq_optimizer = "sampling" if acq_optimizer not in ["lbfgs", "sampling"]: raise ValueError("Expected acq_optimizer to be 'lbfgs' or " "'sampling', got {0}".format(acq_optimizer)) if (not has_gradients(self.base_estimator_) and acq_optimizer != "sampling"): raise ValueError("The regressor {0} should run with " "acq_optimizer" "='sampling'.".format(type(base_estimator))) self.acq_optimizer = acq_optimizer