def create_model(x_train, y_train, alpha): print("begin to train...") model = Ridge(alpha=alpha) clf1 = ensemble.BaggingRegressor(model, n_jobs=1, n_estimators=900) # clf2 = ensemble.AdaBoostRegressor(n_estimators=900, learning_rate=0.01) # clf3 = ensemble.RandomForestRegressor(n_estimators=900) # clf4 = ensemble.ExtraTreesRegressor(n_estimators=900) # print("Bagging") scores = -cross_validation.cross_val_score( model, x_train, y_train, cv=10, scoring='neg_mean_squared_error') # scores1 = -cross_validation.cross_val_score(clf1, x_train, y_train, cv=10, scoring='neg_mean_squared_error') # scores2 = -cross_validation.cross_val_score(clf2, x_train, y_train, cv=10, scoring='neg_mean_squared_error') # scores3 = -cross_validation.cross_val_score(clf3, x_train, y_train, cv=10, scoring='neg_mean_squared_error') # scores4 = -cross_validation.cross_val_score(clf4, x_train, y_train, cv=10, scoring='neg_mean_squared_error') # print('=========================') print('Scores:') print(scores.mean()) # print(scores1.mean()) # print(scores2.mean()) # print(scores3.mean()) # print(scores4.mean()) clf1.fit(x_train, y_train) print("Finish") return clf1
def _doFit(self, goodData_LR, goodData_HR, weight, local): ''' Private function. Fits the regression tree. ''' # For local regression constrain the number of tree # nodes (rules) - section 2.3 if local: self.regressorOpt["max_leaf_nodes"] = 10 else: self.regressorOpt["max_leaf_nodes"] = 30 self.regressorOpt["min_samples_leaf"] = 10 # If per leaf linear regression is used then use modified # DecisionTreeRegressor. Otherwise use the standard one. if self.perLeafLinearRegression: baseRegressor = \ DecisionTreeRegressorWithLinearLeafRegression(self.linearRegressionExtrapolationRatio, self.regressorOpt) else: baseRegressor = \ tree.DecisionTreeRegressor(**self.regressorOpt) reg = ensemble.BaggingRegressor(baseRegressor, **self.baggingRegressorOpt) if goodData_HR.shape[0] <= 1: reg.max_samples = 1.0 reg = reg.fit(goodData_HR, goodData_LR, sample_weight=weight) return reg
def __regressionModel(self,model,parameter): if model == 'DecisionTree': from sklearn import tree self.__model = tree.DecisionTreeRegressor() elif model == 'LinearRegression': from sklearn.linear_model import LinearRegression self.__model = LinearRegression() elif model == 'SVM': from sklearn import svm self.__model = svm.SVR(kernel=parameter["kernel"],C=parameter["C"]) elif model == 'KNeighbors': from sklearn import neighbors self.__model = neighbors.KNeighborsRegressor(n_neighbors=parameter["n_neighbors"]) elif model == 'RandomForest': from sklearn import ensemble self.__model = ensemble.RandomForestRegressor(n_estimators=parameter["n_estimators"]) elif model == 'AdaBoost': from sklearn import ensemble self.__model= ensemble.AdaBoostRegressor(n_estimators=parameter["n_estimators"]) elif model == 'GradientBoosting': from sklearn import ensemble self.__model= ensemble.GradientBoostingRegressor(n_estimators=parameter["n_estimators"]) elif model == 'Bagging': from sklearn import ensemble self.__model = ensemble.BaggingRegressor(n_estimators=parameter["n_estimators"]) elif model == 'ExtraTree': from sklearn.tree import ExtraTreeRegressor self.__model = ExtraTreeRegressor()
def run(X_train, X_test, y_train, y_test, n_estimators=10, max_samples=10): if len(X_train.shape) == 1: X_train = np.array([X_train]).T X_test = np.array([X_test]).T linregress = linear.LinearRegression logregress = linear.LogisticRegression rng = check_random_state(0) #random state object from np.random print print "BAG" print max_samples, type(max_samples) # max_samples = np.float64(10) # print max_samples, type(max_samples) # return X_train, y_train, max_samples, n_estimators, None, None, None ens = ensamble.BaggingRegressor(base_estimator=linregress(), random_state=rng, max_samples=int(max_samples), n_estimators=int(n_estimators)).fit( X_train, y_train) y_predicted = ens.predict(X_test) #Validation rmse = err.RMSE(y_predicted, y_test) r2 = err.Rsquare(y_predicted, y_test) return y_predicted, rmse, r2, None #Last value refers to feature importance index that this model does not provide
def __init__(self, data=None, data_to_predict=None, target=None): """Reads in data and initializes some attributes for later Args: data: preloaded dataframe, default is None """ self.data = data self.target_name = target self.model_dict = { 'LinearRegression': lm.LinearRegression(), 'Lasso': lm.Lasso(), 'Ridge': lm.Ridge, 'RandomForestRegressor': en.RandomForestRegressor(), 'AdaBoostRegressor': en.AdaBoostRegressor(), 'GradientBoost': en.GradientBoostingRegressor(), 'BaggingRegressor': en.BaggingRegressor(), 'RandomForestClassifier': en.RandomForestClassifier() } self.features_ = [] self.selected_features_ = [] self.model = None self.cv_score_ = {} self.train_index = None self.test_index = None self.data_to_predict = data_to_predict self.predictions = None self.train_score_ = None self.test_score_ = None self.best_params_ = None
def Bagging_regression(self, train_attr, train_label): model = ensemble.BaggingRegressor(base_estimator=self.base_estimator, n_estimators=self.n_estimators, max_samples=self.max_samples, max_features=self.max_features) model.fit(train_attr, train_label) return model
def train_model(train, val, y_train, y_val, train_type, model_type, randomstate): if train_type == 'bagging': # model = bagging_models.fit(train, val, y_train, y_val, model_type, randomstate) base_model = DecisionTreeRegressor(random_state=randomstate) model = ensemble.BaggingRegressor(max_samples=0.9, max_features=1, warm_start=True, base_estimator=base_model, random_state=randomstate, n_estimators=100, n_jobs=50) # model = ensemble.BaggingRegressor(max_samples = 0.85, max_features = 1, warm_start = True, base_estimator = base_model, random_state = randomstate, n_estimators = 100, n_jobs = 50) model.fit(train, y_train) return model else: if model_type == 'Linear': model = linear_model.LinearRegression(n_jobs=3) elif model_type == 'SVR': model = svm.SVR(C=3.0, cache_size=50, degree=3, gamma='auto', kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False) # model = svm.SVR(C=3.0, cache_size=50, degree=2, gamma='auto', kernel='sigmoid', max_iter=-1, shrinking=True, tol=0.001, verbose=False) elif model_type == 'DT': model = DecisionTreeRegressor(random_state=randomstate) elif model_type == 'MLP': model = MLPRegressor(hidden_layer_sizes=4, activation='relu', random_state=randomstate) elif model_type == 'poly': poly = preprocessing.PolynomialFeatures(degree=2, include_bias=False, interaction_only=False) train = poly.fit_transform(train) test = poly.fit_transform(test) val = poly.fit_transform(val) model = linear_model.LinearRegression(n_jobs=3) elif model_type == 'LinearGAM': model = LinearGAM() elif model_type == 'GammaGAM': model = GammaGAM() elif model_type == 'InvGaussGAM': model = InvGaussGAM() elif model_type == 'LogisticGAM': model = LogisticGAM() elif model_type == 'PoissonGAM': model = PoissonGAM() elif model_type == 'ExpectileGAM': model = ExpectileGAM() model.fit(train, y_train) return model
def optimize_BaggingSVR(X_train, y_train): opt = modelSel.RandomizedSearchCV(estimator=skEn.BaggingRegressor( base_estimator=sk.SVR(cache_size=500, gamma='auto')), param_distributions=param_baggingSVR, cv=5, scoring=scoreFunction) opt.fit(X_train, y_train) return formatOptimal(opt.best_params_)
def bagging_train(x_train, y_train, x_test): # bagging base_model = linear_model.LassoCV(alphas=None, cv=5) bagging = ensemble.BaggingRegressor(base_estimator=base_model, n_estimators=10) bagging.fit(x_train, y_train) # 预测结果 pred = bagging.predict(x_test) return pred
def optimize_BaggingSVR(X_train, y_train): opt = modelSel.GridSearchCV( skEn.BaggingRegressor(base_estimator=sk.SVR(cache_size=500)), param_baggingSVR, cv=5, scoring=scoreFunction) opt.fit(X_train, y_train) return formatOptimal(opt.best_params_)
def __init__(self, type="linear_regression", regularization=False, n_estimators=100, subsample=1.0, max_depth=3, c=80, e=0.001): if type == "linear_regression": self.model = linear_model.LinearRegression(normalize=True) elif type == "ridge": self.model = linear_model.Ridge() elif type == "SVM": self.model = svm.SVR(kernel='rbf', gamma='auto', C=c, epsilon=e) elif type == 'XGBoost': self.model = ensemble.GradientBoostingRegressor( n_estimators=n_estimators, subsample=subsample, max_depth=max_depth) elif type == 'BaggingRegressor': self.model = ensemble.BaggingRegressor() elif type == 'RandomForest': self.model = ensemble.RandomForestRegressor( n_estimators=n_estimators, max_depth=max_depth) elif type == "AdaBoostRegressor": self.model = ensemble.AdaBoostRegressor(n_estimators=n_estimators) elif type == 'ExtraTreesRegressor': self.model = ensemble.ExtraTreesRegressor( n_estimators=n_estimators, max_depth=max_depth) elif type == 'Lasso': self.model = linear_model.Lasso() elif type == "qda": self.model = discriminant_analysis.QuadraticDiscriminantAnalysis() elif type == "lda": self.model = discriminant_analysis.LinearDiscriminantAnalysis() elif type == 'XGBoost with Bagging': self.model = ensemble.BaggingRegressor( base_estimator=ensemble.GradientBoostingRegressor( n_estimators=100, subsample=1.0, max_depth=3), n_estimators=n_estimators) elif type == "Gaussian Process": self.model = gaussian_process.GaussianProcessRegressor()
def train(XTrain, yTrain, XPredict): params = {'n_estimators': randint(1, 100)} kfold = cross_validation.KFold(len(XTrain), n_folds=3) svr = svm.SVR(kernel='rbf', C=50, gamma=0.1) baggingsvr = ensemble.BaggingRegressor(svr) clf = grid_search.RandomizedSearchCV(baggingsvr, param_distributions=params, n_iter=10, scoring='mean_squared_error', cv=kfold, n_jobs=-1) clf.fit(XTrain, yTrain) # 一次性训练模型 # print clf.best_score_, clf.best_estimator_ yPredict = clf.predict(XPredict) return yPredict, clf.best_params_
def declareLO(): ridGe = Ridge() svR = svm.SVR(C=5, gamma=0.001) adaBoost = ensemble.AdaBoostRegressor() bagging = ensemble.BaggingRegressor() extraTree = ensemble.ExtraTreesRegressor() gradientBoost = ensemble.GradientBoostingRegressor() randForest = ensemble.RandomForestRegressor() learningObjs = [ svR, ridGe, adaBoost, bagging, extraTree, gradientBoost, randForest ] return learningObjs
def __init__(self): self.model_dict = { "SGDRegressor": linear_model.SGDRegressor(max_iter=1000), "HuberRegressor": linear_model.HuberRegressor(), "LinearRegression": linear_model.LinearRegression(), "LinearSVR": svm.LinearSVR(), "BaggingRegressor": ensemble.BaggingRegressor(), "AdaBoostRegressor": ensemble.AdaBoostRegressor(), "ExtraTreesRegressor": ensemble.ExtraTreesRegressor(), "RandomForestRegressor": ensemble.RandomForestRegressor(), "GradientBoostingRegressor": ensemble.GradientBoostingRegressor() }
def setup_BaggingSVR(learner_settings): # default values base_estimator = setup_SVR(learner_settings) n_estimators = 10 max_samples = 1.0 max_features = 1.0 bootstrap = True bootstrap_features = False oob_score = False warm_start = False n_jobs = 1 random_state = None verbose = 0 # change default values for additional_setting in learner_settings: # split identifier=value, so you can identify value and the variable setting_value_pair = additional_setting.split("=") if setting_value_pair[0] == "verbose": if setting_value_pair[1].isnumeric(): verbose = int(setting_value_pair[1]) if setting_value_pair[0] == "random_state": random_state = int(setting_value_pair[1]) if setting_value_pair[0] == "n_jobs": n_jobs = int(setting_value_pair[1]) if setting_value_pair[0] == "warm_start": warm_start = (setting_value_pair[1] == "True") if setting_value_pair[0] == "oob_score": oob_score = (setting_value_pair[1] == "True") if setting_value_pair[0] == "bootstrap_features": bootstrap_features = (setting_value_pair[1] == "True") if setting_value_pair[0] == "bootstrap": bootstrap = (setting_value_pair[1] == "True") if setting_value_pair[0] == "max_features": max_features = parse_to_int_float_bool_string( setting_value_pair[1]) if setting_value_pair[0] == "max_samples": max_samples = parse_to_int_float_bool_string(setting_value_pair[1]) if setting_value_pair[0] == "n_estimators": n_estimators = int(setting_value_pair[1]) return skEn.BaggingRegressor(base_estimator=base_estimator, n_estimators=n_estimators, max_samples=max_samples, max_features=max_features, bootstrap=bootstrap, bootstrap_features=bootstrap_features, oob_score=oob_score, warm_start=warm_start, n_jobs=n_jobs, random_state=random_state, verbose=verbose)
def Call_Bagging_Reg(X_train, y_train, X_test, y_test): clf = ensemble.BaggingRegressor() clf.fit(X_train, y_train) prediction = clf.predict(X_test) print("Bagging Regressor Score: ", clf.score(X_test, y_test)) R2 = r2_score(y_test, prediction) # plot Prediction plot_regression_predictions(y_test, 'Bagging Regressor', prediction) return R2
def mapping_reg(s): rgr=None context_str="" context_img="" params={} if s == 'LR': rgr=LinearRegression() context_str="Linear Regressor" params={'n_jobs': np.arange(1,11,1)} if s == 'DT': rgr=DecisionTreeRegressor(criterion="mse", random_state=128, max_depth=32, min_samples_leaf=1) context_str="Decision Tree Regressor" params={'random_state':np.arange(1,100,5), 'max_depth': np.arange(1,31,2), 'min_samples_leaf': np.arange(1,10,2)} if s == 'BayR': rgr=BayesianRidge() context_str="Bayesian Ridge Regressor" params={'lambda_1':np.arange(1,100,5), 'n_iter': np.arange(1,31,2), 'alpha_1': np.arange(1,10,2)} if s == 'SVR': rgr=svm.SVR() context_str="Support Vector Regressor" params={'max_iter':np.arange(1,100,5), 'C':np.arange(0.1, 1, 0.1)} if s == 'AdaR': rgr=ensemble.AdaBoostRegressor() context_str="Ensemble Ada Boost Regressor" params={'n_estimators':np.arange(25,75), 'learning_rate':np.arange(0.1, 1, 0.1), 'random_state':np.arange(1, 100, 5)} if s == 'BagR': rgr=ensemble.BaggingRegressor() context_str="Ensemble Bagging Regressor" params={'n_estimators':np.arange(25,75), 'max_samples':np.arange(0.1, 1, 0.1), 'random_state':np.arange(1,100,5)} if s == 'ETR': rgr=ensemble.ExtraTreesRegressor() context_str="Ensemble Extra Trees Regressor" params={'n_estimators':np.arange(25,75), 'max_depth': np.arange(1,31,2), 'min_samples_leaf': np.arange(1,10,2), 'max_features':np.arange(0.1, 1, 1), 'random_state':np.arange(1,100,5)} if s == 'GBR': rgr=ensemble.GradientBoostingRegressor() context_str="Ensemble Gradient Boosting Regressor" params={'n_estimators':np.arange(25,75), 'learning_rate':np.arange(0.1, 1, 0.1), 'max_features':np.arange(0.1, 1, 1), 'random_state':np.arange(1,100,5)} if s == 'RFR': rgr=ensemble.RandomForestRegressor() context_str="Ensemble Random Forest Regressor" params={'n_estimators':np.arange(25,75), 'max_depth': np.arange(1,31,2), 'min_samples_leaf': np.arange(1,10,2) , 'max_features':np.arange(0.1, 1, 1), 'random_state':np.arange(1,100,5)} return rgr, context_str, params, context_img
def predict(train_x: pd.DataFrame, train_y: pd.Series, test_x: pd.DataFrame) -> np.ndarray: models = collections.OrderedDict([ # ("SGD", linear_model.SGDRegressor(max_iter=1000, random_state=0)), # ("Lasso", linear_model.Lasso(alpha=1.0, random_state=0)), # ("Ridge", linear_model.Ridge(alpha=1.0, random_state=0)), # ("Elastic Net", linear_model.ElasticNet(alpha=1.0, l1_ratio=0.5, random_state=0)), # ("線形SVM", svm.LinearSVR(C=0.01, epsilon=2.0)), # ("カーネルSVM", svm.SVR(kernel='rbf', C=0.01, gamma=0.1, epsilon=0.1)), # ("最近傍法", neighbors.KNeighborsRegressor(n_neighbors=1, weights='distance')), # ("K近傍法", neighbors.KNeighborsRegressor(n_neighbors=5, weights='distance')), # ("決定木", tree.DecisionTreeRegressor()), ("ランダムフォレスト", ensemble.RandomForestRegressor(n_estimators=100, n_jobs=-1, random_state=0)), ('bagging', ensemble.BaggingRegressor(tree.DecisionTreeRegressor(random_state=0), n_estimators=100, n_jobs=-1, random_state=0)), ('AdaBoost', ensemble.AdaBoostRegressor(tree.DecisionTreeRegressor(random_state=0), n_estimators=100, random_state=0)), # ('Bagging & AdaBoost', ensemble.AdaBoostRegressor(ensemble.BaggingRegressor(tree.DecisionTreeRegressor(random_state=0), n_estimators=2000, random_state=0), random_state=0)), ('GradientBoost', ensemble.GradientBoostingRegressor(n_estimators=1000, learning_rate=0.01, random_state=0)), # ('XGBoost', xgb.XGBRegressor(n_estimators=100, random_state=0)), # ('XGBoostRF', xgb.XGBRFRegressor(n_estimators=100, random_state=0)), ]) train_y = train_y for k, v in models.items(): scores = model_selection.cross_validate( v, train_x, train_y, cv=5, scoring=metrics.make_scorer(lambda y_true, y_pred: np.sqrt( metrics.mean_squared_error(y_true, y_pred)))) print(" ", k) print(" ", "RMSE = ", scores["test_score"].mean()) print(" ", "標準偏差 = ", scores["test_score"].std()) model = models["GradientBoost"] model.fit(train_x, train_y) return model.predict(test_x)
def train_bagging(X_train, y_train, X_test, y_test): ''' Creates bagging regressor estimator and returns it along with it's r2_score ''' clf_bagging = ensemble.BaggingRegressor() clf_bagging.fit(X_train, y_train) r2_bagging = metrics.r2_score( y_test, clf_bagging.predict(X_test)), metrics.r2_score( y_train, clf_bagging.predict(X_train)) coef_bagging = { 'estimators': len(clf_bagging.estimators_), 'estimators_features': len(clf_bagging.estimators_features_) } return clf_bagging, r2_bagging, coef_bagging
def Call_Bagging_Reg(X_train, y_train, X_test, y_test): """ Bagging Regression """ clf = ensemble.BaggingRegressor() clf.fit(X_train, y_train) Predicted = clf.predict(X_test) print("BaggingRegressor Score = ", clf.score(X_test, y_test)) MSE = mean_squared_error(y_test, Predicted) R2 = r2_score(y_test, Predicted) plot_regression(y_test, 'Bagging Reg', Predicted) return "BaggingRegressor MSE =", MSE, "BaggingRegressor R2 =", R2
def model(base_estimator=base_xt_reg): model_params = { "base_estimator": base_estimator, "n_estimators": 80, "max_samples": 1.0, "max_features": 1.0, "bootstrap": True, "bootstrap_features": False, "oob_score": False, "n_jobs": -1, "random_state": random_state, "verbose": 3, } model = ensemble.BaggingRegressor(**model_params) model_name = type(model).__name__ return model_name, model, model_params
def train_rf_zeroinflated(x, y, ntrees=50, njobs=12, max_depth=None, max_features=1.0): ''' Return a trained Random Forest regressor''' if max_features == 'auto': max_features = 1.0 rf = ensemble.BaggingRegressor( base_estimator=zeroinflated.DecisionTreeZeroInflatedRegressor(), n_estimators=ntrees, n_jobs=njobs, max_features=max_features, oob_score=True) rf.fit(x, y) return rf
def get_bagging_model( base_estimator=get_xtr(), n_estimators=80, n_jobs=-1, verbose=1, ): """ Parameters which we will use in final model training on DEVELOPMENT set. Dict with parameters names (str) as keys and parameter settings as values. * base_estimator: object, default=None. The base estimator to fit on random subsets of the dataset. If None, then the base estimator is a DecisionTreeRegressor. * n_estimators: int, default=10. The number of base estimators in the ensemble. * max_samples: int or float, default=1.0. The # of samples to draw from X to train each base estimator (with replacement by default). Lower ratios avoid over-fitting * max_features: int or float, default=1.0. Like `max_samples` but refer to features. Lower ratios avoid over-fitting. * bootstrap: bool, default=True. Whether samples are drawn with replacement. If False, sampling without replacement is performed. * bootstrap_features: bool, default=False. Whether features are drawn with replacement. * oob_score: bool, default=False. Whether to use out-of-bag samples to estimate the generalization error. * n_jobs: int, default=None. The number of jobs to run in parallel for both fit and predict. None means 1. -1 means using all processors. """ print(f"\nLoad model...") model_params = { "base_estimator": base_estimator, "n_estimators": n_estimators, "max_samples": 1.0, "max_features": 1.0, "bootstrap": True, "bootstrap_features": False, "oob_score": False, "n_jobs": n_jobs, "random_state": rnd_state, "verbose": verbose, } model = ensemble.BaggingRegressor(**model_params) model_name = type(model).__name__ return model_name, model, model_params
def __init__(self, df, run_prefix): y = df.PHENO x = df.drop(columns=['PHENO']) x_train, x_test, y_train, y_test = model_selection.train_test_split( x, y, test_size=0.3, random_state=42) # 70:30 ids_train = x_train.ID ids_test = x_test.ID x_train = x_train.drop(columns=['ID']) x_test = x_test.drop(columns=['ID']) self._df = df self._run_prefix = run_prefix self._x_train = x_train self._x_test = x_test self._y_train = y_train self._y_test = y_test self._ids_train = ids_train self._ids_test = ids_test self.log_table = None self.best_algorithm = None self.algorithm = None self.rfe_df = None candidate_algorithms = [ ensemble.AdaBoostRegressor(), ensemble.BaggingRegressor(), ensemble.GradientBoostingRegressor(), ensemble.RandomForestRegressor(n_estimators=10), linear_model.LinearRegression(), linear_model.SGDRegressor(), neighbors.KNeighborsRegressor(), neural_network.MLPRegressor(), svm.SVR(gamma='auto'), xgboost.XGBRegressor() ] self._algorithms = { algorithm.__class__.__name__: algorithm for algorithm in candidate_algorithms } self._best_algorithm_name = None self._best_algorithm = None self._best_algorithm_metrics = None
def __init__(self, df, run_prefix, max_iter, cv_count): self.run_prefix = run_prefix self.max_iter = max_iter self.cv_count = cv_count self.y_tune = df.PHENO self.X_tune = df.drop(columns=['PHENO']) self.IDs_tune = self.X_tune.ID self.X_tune = self.X_tune.drop(columns=['ID']) best_algo_name_in = run_prefix + '.best_algorithm.txt' best_algo_df = pd.read_csv(best_algo_name_in, header=None, index_col=False) self.best_algo = str(best_algo_df.iloc[0,0]) self.algorithms = [ linear_model.LinearRegression(), ensemble.RandomForestRegressor(), ensemble.AdaBoostRegressor(), ensemble.GradientBoostingRegressor(), linear_model.SGDRegressor(), svm.SVR(), neural_network.MLPRegressor(), neighbors.KNeighborsRegressor(), ensemble.BaggingRegressor(), xgboost.XGBRegressor() ] # Initialize a few variables we will be using later self.log_table = None self.best_algo_name_in = None self.best_algo_df = None self.hyperparameters = None self.scoring_metric = None self.cv_tuned = None self.cv_baseline = None self.algo = None self.searchCVResults = None self.rand_search = None self.algo_tuned = None self.tune_out = None
def findNextTick(df, type): df["nextClose"] = df["High"].shift(-1) #df["nextTime"] = df["time"].shift(-1) df["nextIndex"] = df.index df["nextIndex"] = df["nextIndex"].shift(-1) df.at[len(df) - 1, 'nextIndex'] = df.iloc[len(df) - 2]["nextIndex"] + 1 df = df[0:len(df) - 2] #df.to_csv("test3.csv") X_pred = df[-1:].drop(["nextClose"], axis=1) print(X_pred) df = df[0:-1] X = df.drop(["nextClose"], axis=1) #X.to_csv("test4.csv") y = df["nextClose"] r1 = LinearRegression(n_jobs=-1) r2 = tree.DecisionTreeRegressor() r3 = ensemble.RandomForestRegressor(n_jobs=-1) r4 = ensemble.AdaBoostRegressor() r5 = ensemble.BaggingRegressor(n_jobs=-1) r6 = ensemble.GradientBoostingRegressor() estimators = [('r1', r1), ('r2', r2), ('r3', r3), ('r4', r4), ('r5', r5), ('r6', r6)] if (type == 0): regressor = ensemble.StackingRegressor( estimators=estimators, final_estimator=ensemble.RandomForestRegressor(n_estimators=100, random_state=42, n_jobs=-1)) elif (type == 1): regressor = ensemble.VotingRegressor(estimators=estimators) regressor.fit(X, y) #training the algorithm y_pred = list(regressor.predict(X_pred)) y_pred.insert(0, X_pred.iloc[0]["High"]) y_pred = np.asarray(y_pred) x_predTime = list(X_pred.index) x_predTime.append(x_predTime[0] + 1) x_predTime = np.asarray(x_predTime) print(y_pred) print(x_predTime) return {"Y": y_pred, "X": x_predTime}
def _doFit(self, goodData_LR, goodData_HR, weight, local): ''' Private function. Fits the neural network. ''' # Once all the samples have been picked build the regression using # neural network approach print('Fitting neural network') HR_scaler = preprocessing.StandardScaler() data_HR = HR_scaler.fit_transform(goodData_HR) LR_scaler = preprocessing.StandardScaler() data_LR = LR_scaler.fit_transform(goodData_LR.reshape(-1, 1)) if self.regressionType == REG_sknn_ann: layers = [] if 'hidden_layer_sizes' in self.regressorOpt.keys(): for layer in self.regressorOpt['hidden_layer_sizes']: layers.append( ann_sknn.Layer(self.regressorOpt['activation'], units=layer)) else: layers.append( ann_sknn.Layer(self.regressorOpt['activation'], units=100)) self.regressorOpt.pop('activation') self.regressorOpt.pop('hidden_layer_sizes') output_layer = ann_sknn.Layer('Linear', units=1) layers.append(output_layer) baseRegressor = ann_sknn.Regressor(layers, **self.regressorOpt) else: baseRegressor = ann_sklearn.MLPRegressor(**self.regressorOpt) # NN regressors do not support sample weights. weight = None reg = ensemble.BaggingRegressor(baseRegressor, **self.baggingRegressorOpt) if data_HR.shape[0] <= 1: reg.max_samples = 1.0 reg = reg.fit(data_HR, np.ravel(data_LR), sample_weight=weight) return {"reg": reg, "HR_scaler": HR_scaler, "LR_scaler": LR_scaler}
def train(num,X_train,y_train,X_test,y_test): if num == 1: model = tree.DecisionTreeRegressor() elif num == 2: model = svm.SVR() elif num == 3: model = LinearRegression() elif num == 4: model = neighbors.KNeighborsRegressor(n_neighbors=11) elif num == 5: model = ensemble.RandomForestRegressor(n_estimators=100) elif num == 6: model = ensemble.AdaBoostRegressor(n_estimators=100) elif num == 7: model = ensemble.GradientBoostingRegressor(n_estimators=100) elif num == 8: model = ensemble.BaggingRegressor() elif num == 9: model = ExtraTreeRegressor() model.fit(X_train, y_train) pred=model.predict(X_test) return rmse(np.array(y_test), np.array(pred)),r_squared(np.array(y_test),np.array(pred))
def main(): df = pd.read_csv('./Testing_Oceans_data.csv') df = df.convert_objects(convert_numeric=True) prediction_label = 'Sound_Velocity(m/s)' X = np.array(df.drop([prediction_label], 1)) y = np.array(df[prediction_label]) X_train, X_test, y_train, y_test = cross_validation.train_test_split( X, y, test_size=0.2) evaluations = [ ('Elastic Net', linear_model.ElasticNet(alpha=0.1), X_train, y_train, X_test, y_test), ('Lasso', linear_model.Lasso(alpha=0.1), X_train, y_train, X_test, y_test), ('Ridge', linear_model.Ridge(alpha=.1), X_train, y_train, X_test, y_test), ('Ensemble Random Forest', ensemble.RandomForestRegressor(), X_train, y_train, X_test, y_test), ('Ensemble Extra Trees', ensemble.ExtraTreesRegressor(), X_train, y_train, X_test, y_test), ('Ensemble Bagging Regressor', ensemble.BaggingRegressor(), X_train, y_train, X_test, y_test), ('Ensemble Gradiant Boosting Regressor', ensemble.GradientBoostingRegressor(), X_train, y_train, X_test, y_test), ('Ensemble Ada Boost Regressor', ensemble.AdaBoostRegressor(), X_train, y_train, X_test, y_test), ('SVR Kernel Linear', svm.SVR(kernel='linear'), X_train, y_train, X_test, y_test), ('SVR Kernel RBF', svm.SVR(kernel='rbf'), X_train, y_train, X_test, y_test) ] for evaluation in evaluations: evaluate(*evaluation)
def __init__(self, trainFilename, testFilename, resultsDir): # assert len(trainFilenames) == len(testFilenames) self.resultsDir = resultsDir #ntrees = 1000 self.trainFilename = trainFilename self.testFilename = testFilename self.regressors = { 'lm': MultiOutputRegressor(linear_model.LinearRegression()), 'rg': MultiOutputRegressor(linear_model.Ridge()), 'svm': MultiOutputRegressor(svm.SVR(kernel='rbf')), 'gp': MultiOutputRegressor(gaussian_process.GaussianProcessRegressor()), 'knn': MultiOutputRegressor(neighbors.KNeighborsRegressor(n_neighbors=5)), 'dt': MultiOutputRegressor(tree.DecisionTreeRegressor()), 'br': MultiOutputRegressor(ensemble.BaggingRegressor(n_jobs=-1)), 'etr': MultiOutputRegressor(ensemble.ExtraTreesRegressor(n_jobs=-1)), 'rfr': MultiOutputRegressor(ensemble.RandomForestRegressor(n_jobs=-1)), 'abr': MultiOutputRegressor(ensemble.AdaBoostRegressor()), 'gbr': MultiOutputRegressor(ensemble.GradientBoostingRegressor()), 'xgb': MultiOutputRegressor(xgboost.XGBRegressor()), 'dl': None } self.load_data() self.preprocess_data() for key in self.regressors.keys(): self.fit_model(key)