Exemple #1
0
def create_model(x_train, y_train, alpha):
    print("begin to train...")
    model = Ridge(alpha=alpha)
    clf1 = ensemble.BaggingRegressor(model, n_jobs=1, n_estimators=900)
    # clf2 = ensemble.AdaBoostRegressor(n_estimators=900, learning_rate=0.01)
    # clf3 = ensemble.RandomForestRegressor(n_estimators=900)
    # clf4 = ensemble.ExtraTreesRegressor(n_estimators=900)
    # print("Bagging")

    scores = -cross_validation.cross_val_score(
        model, x_train, y_train, cv=10, scoring='neg_mean_squared_error')
    # scores1 = -cross_validation.cross_val_score(clf1, x_train, y_train, cv=10, scoring='neg_mean_squared_error')
    # scores2 = -cross_validation.cross_val_score(clf2, x_train, y_train, cv=10, scoring='neg_mean_squared_error')
    # scores3 = -cross_validation.cross_val_score(clf3, x_train, y_train, cv=10, scoring='neg_mean_squared_error')
    # scores4 = -cross_validation.cross_val_score(clf4, x_train, y_train, cv=10, scoring='neg_mean_squared_error')
    #
    print('=========================')
    print('Scores:')
    print(scores.mean())
    # print(scores1.mean())
    # print(scores2.mean())
    # print(scores3.mean())
    # print(scores4.mean())
    clf1.fit(x_train, y_train)
    print("Finish")
    return clf1
Exemple #2
0
    def _doFit(self, goodData_LR, goodData_HR, weight, local):
        ''' Private function. Fits the regression tree.
        '''

        # For local regression constrain the number of tree
        # nodes (rules) - section 2.3
        if local:
            self.regressorOpt["max_leaf_nodes"] = 10
        else:
            self.regressorOpt["max_leaf_nodes"] = 30
        self.regressorOpt["min_samples_leaf"] = 10

        # If per leaf linear regression is used then use modified
        # DecisionTreeRegressor. Otherwise use the standard one.
        if self.perLeafLinearRegression:
            baseRegressor = \
                DecisionTreeRegressorWithLinearLeafRegression(self.linearRegressionExtrapolationRatio,
                                                              self.regressorOpt)
        else:
            baseRegressor = \
                tree.DecisionTreeRegressor(**self.regressorOpt)

        reg = ensemble.BaggingRegressor(baseRegressor,
                                        **self.baggingRegressorOpt)
        if goodData_HR.shape[0] <= 1:
            reg.max_samples = 1.0
        reg = reg.fit(goodData_HR, goodData_LR, sample_weight=weight)

        return reg
Exemple #3
0
 def __regressionModel(self,model,parameter):
     if model == 'DecisionTree':
         from sklearn import tree
         self.__model = tree.DecisionTreeRegressor()
     elif model == 'LinearRegression':
         from sklearn.linear_model import LinearRegression
         self.__model = LinearRegression()
     elif model == 'SVM':
         from sklearn import svm
         self.__model = svm.SVR(kernel=parameter["kernel"],C=parameter["C"])
     elif model == 'KNeighbors':
         from sklearn import neighbors
         self.__model = neighbors.KNeighborsRegressor(n_neighbors=parameter["n_neighbors"])
     elif model == 'RandomForest':
         from sklearn import ensemble
         self.__model = ensemble.RandomForestRegressor(n_estimators=parameter["n_estimators"])
     elif model == 'AdaBoost':
         from sklearn import ensemble
         self.__model= ensemble.AdaBoostRegressor(n_estimators=parameter["n_estimators"])
     elif model == 'GradientBoosting':
         from sklearn import ensemble
         self.__model= ensemble.GradientBoostingRegressor(n_estimators=parameter["n_estimators"])  
     elif model == 'Bagging':
         from sklearn import ensemble
         self.__model = ensemble.BaggingRegressor(n_estimators=parameter["n_estimators"])
     elif model == 'ExtraTree':
         from sklearn.tree import ExtraTreeRegressor
         self.__model = ExtraTreeRegressor()
Exemple #4
0
def run(X_train, X_test, y_train, y_test, n_estimators=10, max_samples=10):

    if len(X_train.shape) == 1:
        X_train = np.array([X_train]).T
        X_test = np.array([X_test]).T

    linregress = linear.LinearRegression
    logregress = linear.LogisticRegression
    rng = check_random_state(0)  #random state object from np.random

    print
    print "BAG"
    print max_samples, type(max_samples)

    # max_samples = np.float64(10)
    # print max_samples, type(max_samples)
    # return X_train, y_train, max_samples, n_estimators, None, None, None
    ens = ensamble.BaggingRegressor(base_estimator=linregress(),
                                    random_state=rng,
                                    max_samples=int(max_samples),
                                    n_estimators=int(n_estimators)).fit(
                                        X_train, y_train)

    y_predicted = ens.predict(X_test)

    #Validation
    rmse = err.RMSE(y_predicted, y_test)
    r2 = err.Rsquare(y_predicted, y_test)

    return y_predicted, rmse, r2, None  #Last value refers to feature importance index that this model does not provide
Exemple #5
0
    def __init__(self, data=None, data_to_predict=None, target=None):
        """Reads in data and initializes some attributes for later

        Args:
            data: preloaded dataframe, default is None
        """
        self.data = data
        self.target_name = target
        self.model_dict = {
            'LinearRegression': lm.LinearRegression(),
            'Lasso': lm.Lasso(),
            'Ridge': lm.Ridge,
            'RandomForestRegressor': en.RandomForestRegressor(),
            'AdaBoostRegressor': en.AdaBoostRegressor(),
            'GradientBoost': en.GradientBoostingRegressor(),
            'BaggingRegressor': en.BaggingRegressor(),
            'RandomForestClassifier': en.RandomForestClassifier()
        }
        self.features_ = []
        self.selected_features_ = []
        self.model = None
        self.cv_score_ = {}
        self.train_index = None
        self.test_index = None
        self.data_to_predict = data_to_predict
        self.predictions = None
        self.train_score_ = None
        self.test_score_ = None
        self.best_params_ = None
    def Bagging_regression(self, train_attr, train_label):
        model = ensemble.BaggingRegressor(base_estimator=self.base_estimator,
                                          n_estimators=self.n_estimators,
                                          max_samples=self.max_samples,
                                          max_features=self.max_features)
        model.fit(train_attr, train_label)

        return model
def train_model(train, val, y_train, y_val, train_type, model_type,
                randomstate):
    if train_type == 'bagging':
        # model  = bagging_models.fit(train, val, y_train,  y_val, model_type,  randomstate)
        base_model = DecisionTreeRegressor(random_state=randomstate)
        model = ensemble.BaggingRegressor(max_samples=0.9,
                                          max_features=1,
                                          warm_start=True,
                                          base_estimator=base_model,
                                          random_state=randomstate,
                                          n_estimators=100,
                                          n_jobs=50)
        # model = ensemble.BaggingRegressor(max_samples = 0.85, max_features = 1, warm_start = True, base_estimator = base_model, random_state = randomstate, n_estimators = 100, n_jobs = 50)
        model.fit(train, y_train)
        return model
    else:
        if model_type == 'Linear':
            model = linear_model.LinearRegression(n_jobs=3)
        elif model_type == 'SVR':
            model = svm.SVR(C=3.0,
                            cache_size=50,
                            degree=3,
                            gamma='auto',
                            kernel='rbf',
                            max_iter=-1,
                            shrinking=True,
                            tol=0.001,
                            verbose=False)
            # model = svm.SVR(C=3.0, cache_size=50, degree=2, gamma='auto', kernel='sigmoid', max_iter=-1, shrinking=True, tol=0.001, verbose=False)
        elif model_type == 'DT':
            model = DecisionTreeRegressor(random_state=randomstate)
        elif model_type == 'MLP':
            model = MLPRegressor(hidden_layer_sizes=4,
                                 activation='relu',
                                 random_state=randomstate)
        elif model_type == 'poly':
            poly = preprocessing.PolynomialFeatures(degree=2,
                                                    include_bias=False,
                                                    interaction_only=False)
            train = poly.fit_transform(train)
            test = poly.fit_transform(test)
            val = poly.fit_transform(val)
            model = linear_model.LinearRegression(n_jobs=3)
        elif model_type == 'LinearGAM':
            model = LinearGAM()
        elif model_type == 'GammaGAM':
            model = GammaGAM()
        elif model_type == 'InvGaussGAM':
            model = InvGaussGAM()
        elif model_type == 'LogisticGAM':
            model = LogisticGAM()
        elif model_type == 'PoissonGAM':
            model = PoissonGAM()
        elif model_type == 'ExpectileGAM':
            model = ExpectileGAM()

        model.fit(train, y_train)
        return model
Exemple #8
0
def optimize_BaggingSVR(X_train, y_train):
    opt = modelSel.RandomizedSearchCV(estimator=skEn.BaggingRegressor(
        base_estimator=sk.SVR(cache_size=500, gamma='auto')),
                                      param_distributions=param_baggingSVR,
                                      cv=5,
                                      scoring=scoreFunction)
    opt.fit(X_train, y_train)

    return formatOptimal(opt.best_params_)
Exemple #9
0
def bagging_train(x_train, y_train, x_test):
    # bagging
    base_model = linear_model.LassoCV(alphas=None, cv=5)
    bagging = ensemble.BaggingRegressor(base_estimator=base_model,
                                        n_estimators=10)
    bagging.fit(x_train, y_train)
    # 预测结果
    pred = bagging.predict(x_test)
    return pred
def optimize_BaggingSVR(X_train, y_train):
    opt = modelSel.GridSearchCV(
        skEn.BaggingRegressor(base_estimator=sk.SVR(cache_size=500)),
        param_baggingSVR,
        cv=5,
        scoring=scoreFunction)
    opt.fit(X_train, y_train)

    return formatOptimal(opt.best_params_)
Exemple #11
0
 def __init__(self,
              type="linear_regression",
              regularization=False,
              n_estimators=100,
              subsample=1.0,
              max_depth=3,
              c=80,
              e=0.001):
     if type == "linear_regression":
         self.model = linear_model.LinearRegression(normalize=True)
     elif type == "ridge":
         self.model = linear_model.Ridge()
     elif type == "SVM":
         self.model = svm.SVR(kernel='rbf', gamma='auto', C=c, epsilon=e)
     elif type == 'XGBoost':
         self.model = ensemble.GradientBoostingRegressor(
             n_estimators=n_estimators,
             subsample=subsample,
             max_depth=max_depth)
     elif type == 'BaggingRegressor':
         self.model = ensemble.BaggingRegressor()
     elif type == 'RandomForest':
         self.model = ensemble.RandomForestRegressor(
             n_estimators=n_estimators, max_depth=max_depth)
     elif type == "AdaBoostRegressor":
         self.model = ensemble.AdaBoostRegressor(n_estimators=n_estimators)
     elif type == 'ExtraTreesRegressor':
         self.model = ensemble.ExtraTreesRegressor(
             n_estimators=n_estimators, max_depth=max_depth)
     elif type == 'Lasso':
         self.model = linear_model.Lasso()
     elif type == "qda":
         self.model = discriminant_analysis.QuadraticDiscriminantAnalysis()
     elif type == "lda":
         self.model = discriminant_analysis.LinearDiscriminantAnalysis()
     elif type == 'XGBoost with Bagging':
         self.model = ensemble.BaggingRegressor(
             base_estimator=ensemble.GradientBoostingRegressor(
                 n_estimators=100, subsample=1.0, max_depth=3),
             n_estimators=n_estimators)
     elif type == "Gaussian Process":
         self.model = gaussian_process.GaussianProcessRegressor()
Exemple #12
0
def train(XTrain, yTrain, XPredict):
    params = {'n_estimators': randint(1, 100)}
    kfold = cross_validation.KFold(len(XTrain), n_folds=3)
    svr = svm.SVR(kernel='rbf', C=50, gamma=0.1)
    baggingsvr = ensemble.BaggingRegressor(svr)
    clf = grid_search.RandomizedSearchCV(baggingsvr, param_distributions=params, n_iter=10,
                                         scoring='mean_squared_error', cv=kfold, n_jobs=-1)
    clf.fit(XTrain, yTrain)  # 一次性训练模型
    # print clf.best_score_, clf.best_estimator_
    yPredict = clf.predict(XPredict)
    return yPredict, clf.best_params_
Exemple #13
0
def declareLO():
    ridGe = Ridge()
    svR = svm.SVR(C=5, gamma=0.001)
    adaBoost = ensemble.AdaBoostRegressor()
    bagging = ensemble.BaggingRegressor()
    extraTree = ensemble.ExtraTreesRegressor()
    gradientBoost = ensemble.GradientBoostingRegressor()
    randForest = ensemble.RandomForestRegressor()
    learningObjs = [
        svR, ridGe, adaBoost, bagging, extraTree, gradientBoost, randForest
    ]
    return learningObjs
Exemple #14
0
 def __init__(self):
     self.model_dict = {
         "SGDRegressor": linear_model.SGDRegressor(max_iter=1000),
         "HuberRegressor": linear_model.HuberRegressor(),
         "LinearRegression": linear_model.LinearRegression(),
         "LinearSVR": svm.LinearSVR(),
         "BaggingRegressor": ensemble.BaggingRegressor(),
         "AdaBoostRegressor": ensemble.AdaBoostRegressor(),
         "ExtraTreesRegressor": ensemble.ExtraTreesRegressor(),
         "RandomForestRegressor": ensemble.RandomForestRegressor(),
         "GradientBoostingRegressor": ensemble.GradientBoostingRegressor()
     }
Exemple #15
0
def setup_BaggingSVR(learner_settings):
    # default values
    base_estimator = setup_SVR(learner_settings)
    n_estimators = 10
    max_samples = 1.0
    max_features = 1.0
    bootstrap = True
    bootstrap_features = False
    oob_score = False
    warm_start = False
    n_jobs = 1
    random_state = None
    verbose = 0

    # change default values
    for additional_setting in learner_settings:
        # split identifier=value, so you can identify value and the variable
        setting_value_pair = additional_setting.split("=")
        if setting_value_pair[0] == "verbose":
            if setting_value_pair[1].isnumeric():
                verbose = int(setting_value_pair[1])
        if setting_value_pair[0] == "random_state":
            random_state = int(setting_value_pair[1])
        if setting_value_pair[0] == "n_jobs":
            n_jobs = int(setting_value_pair[1])
        if setting_value_pair[0] == "warm_start":
            warm_start = (setting_value_pair[1] == "True")
        if setting_value_pair[0] == "oob_score":
            oob_score = (setting_value_pair[1] == "True")
        if setting_value_pair[0] == "bootstrap_features":
            bootstrap_features = (setting_value_pair[1] == "True")
        if setting_value_pair[0] == "bootstrap":
            bootstrap = (setting_value_pair[1] == "True")
        if setting_value_pair[0] == "max_features":
            max_features = parse_to_int_float_bool_string(
                setting_value_pair[1])
        if setting_value_pair[0] == "max_samples":
            max_samples = parse_to_int_float_bool_string(setting_value_pair[1])
        if setting_value_pair[0] == "n_estimators":
            n_estimators = int(setting_value_pair[1])

    return skEn.BaggingRegressor(base_estimator=base_estimator,
                                 n_estimators=n_estimators,
                                 max_samples=max_samples,
                                 max_features=max_features,
                                 bootstrap=bootstrap,
                                 bootstrap_features=bootstrap_features,
                                 oob_score=oob_score,
                                 warm_start=warm_start,
                                 n_jobs=n_jobs,
                                 random_state=random_state,
                                 verbose=verbose)
Exemple #16
0
def Call_Bagging_Reg(X_train, y_train, X_test, y_test):

    clf = ensemble.BaggingRegressor()
    clf.fit(X_train, y_train)
    prediction = clf.predict(X_test)
    print("Bagging Regressor Score: ", clf.score(X_test, y_test))

    R2 = r2_score(y_test, prediction)

    # plot Prediction
    plot_regression_predictions(y_test, 'Bagging Regressor', prediction)

    return R2
def mapping_reg(s):
	rgr=None
	context_str=""
	context_img=""
	params={}
	if s == 'LR':			
		rgr=LinearRegression()
		context_str="Linear Regressor"
		params={'n_jobs': np.arange(1,11,1)}

	if s == 'DT':	
		rgr=DecisionTreeRegressor(criterion="mse", random_state=128, max_depth=32, min_samples_leaf=1)
		context_str="Decision Tree Regressor"
		params={'random_state':np.arange(1,100,5), 'max_depth': np.arange(1,31,2), 'min_samples_leaf': np.arange(1,10,2)}

	if s == 'BayR':
		rgr=BayesianRidge()
		context_str="Bayesian Ridge Regressor"
		params={'lambda_1':np.arange(1,100,5), 'n_iter': np.arange(1,31,2), 'alpha_1': np.arange(1,10,2)}

	if s == 'SVR':
		rgr=svm.SVR()
		context_str="Support Vector Regressor"
		params={'max_iter':np.arange(1,100,5), 'C':np.arange(0.1, 1, 0.1)}
	
	if s == 'AdaR':
		rgr=ensemble.AdaBoostRegressor()
		context_str="Ensemble Ada Boost Regressor"
		params={'n_estimators':np.arange(25,75), 'learning_rate':np.arange(0.1, 1, 0.1), 'random_state':np.arange(1, 100, 5)}

	if s == 'BagR':
		rgr=ensemble.BaggingRegressor()
		context_str="Ensemble Bagging Regressor"
		params={'n_estimators':np.arange(25,75), 'max_samples':np.arange(0.1, 1, 0.1), 'random_state':np.arange(1,100,5)}

	if s == 'ETR':
		rgr=ensemble.ExtraTreesRegressor()
		context_str="Ensemble Extra Trees Regressor"
		params={'n_estimators':np.arange(25,75), 'max_depth': np.arange(1,31,2), 'min_samples_leaf': np.arange(1,10,2), 'max_features':np.arange(0.1, 1, 1), 'random_state':np.arange(1,100,5)}

	if s == 'GBR':
		rgr=ensemble.GradientBoostingRegressor()
		context_str="Ensemble Gradient Boosting Regressor"					
		params={'n_estimators':np.arange(25,75), 'learning_rate':np.arange(0.1, 1, 0.1), 'max_features':np.arange(0.1, 1, 1), 'random_state':np.arange(1,100,5)}
							
	if s == 'RFR':
		rgr=ensemble.RandomForestRegressor()
		context_str="Ensemble Random Forest Regressor"
		params={'n_estimators':np.arange(25,75), 'max_depth': np.arange(1,31,2), 'min_samples_leaf': np.arange(1,10,2) , 'max_features':np.arange(0.1, 1, 1), 'random_state':np.arange(1,100,5)}

	return rgr, context_str, params, context_img
Exemple #18
0
def predict(train_x: pd.DataFrame, train_y: pd.Series,
            test_x: pd.DataFrame) -> np.ndarray:
    models = collections.OrderedDict([
        # ("SGD", linear_model.SGDRegressor(max_iter=1000, random_state=0)),
        # ("Lasso", linear_model.Lasso(alpha=1.0, random_state=0)),
        # ("Ridge", linear_model.Ridge(alpha=1.0, random_state=0)),
        # ("Elastic Net", linear_model.ElasticNet(alpha=1.0, l1_ratio=0.5, random_state=0)),
        # ("線形SVM", svm.LinearSVR(C=0.01, epsilon=2.0)),
        # ("カーネルSVM", svm.SVR(kernel='rbf', C=0.01, gamma=0.1, epsilon=0.1)),
        # ("最近傍法", neighbors.KNeighborsRegressor(n_neighbors=1, weights='distance')),
        # ("K近傍法", neighbors.KNeighborsRegressor(n_neighbors=5, weights='distance')),
        # ("決定木", tree.DecisionTreeRegressor()),
        ("ランダムフォレスト",
         ensemble.RandomForestRegressor(n_estimators=100,
                                        n_jobs=-1,
                                        random_state=0)),
        ('bagging',
         ensemble.BaggingRegressor(tree.DecisionTreeRegressor(random_state=0),
                                   n_estimators=100,
                                   n_jobs=-1,
                                   random_state=0)),
        ('AdaBoost',
         ensemble.AdaBoostRegressor(tree.DecisionTreeRegressor(random_state=0),
                                    n_estimators=100,
                                    random_state=0)),
        # ('Bagging & AdaBoost', ensemble.AdaBoostRegressor(ensemble.BaggingRegressor(tree.DecisionTreeRegressor(random_state=0), n_estimators=2000, random_state=0), random_state=0)),
        ('GradientBoost',
         ensemble.GradientBoostingRegressor(n_estimators=1000,
                                            learning_rate=0.01,
                                            random_state=0)),
        # ('XGBoost', xgb.XGBRegressor(n_estimators=100, random_state=0)),
        # ('XGBoostRF', xgb.XGBRFRegressor(n_estimators=100, random_state=0)),
    ])

    train_y = train_y
    for k, v in models.items():
        scores = model_selection.cross_validate(
            v,
            train_x,
            train_y,
            cv=5,
            scoring=metrics.make_scorer(lambda y_true, y_pred: np.sqrt(
                metrics.mean_squared_error(y_true, y_pred))))
        print("  ", k)
        print("    ", "RMSE     = ", scores["test_score"].mean())
        print("    ", "標準偏差 = ", scores["test_score"].std())

    model = models["GradientBoost"]
    model.fit(train_x, train_y)
    return model.predict(test_x)
Exemple #19
0
def train_bagging(X_train, y_train, X_test, y_test):
    '''
    Creates bagging regressor estimator and returns it along with it's r2_score
    '''
    clf_bagging = ensemble.BaggingRegressor()
    clf_bagging.fit(X_train, y_train)
    r2_bagging = metrics.r2_score(
        y_test, clf_bagging.predict(X_test)), metrics.r2_score(
            y_train, clf_bagging.predict(X_train))
    coef_bagging = {
        'estimators': len(clf_bagging.estimators_),
        'estimators_features': len(clf_bagging.estimators_features_)
    }
    return clf_bagging, r2_bagging, coef_bagging
def Call_Bagging_Reg(X_train, y_train, X_test, y_test):
    """
    Bagging Regression
    """

    clf = ensemble.BaggingRegressor()

    clf.fit(X_train, y_train)
    Predicted = clf.predict(X_test)
    print("BaggingRegressor Score = ", clf.score(X_test, y_test))
    MSE = mean_squared_error(y_test, Predicted)
    R2 = r2_score(y_test, Predicted)
    plot_regression(y_test, 'Bagging Reg', Predicted)

    return "BaggingRegressor MSE =", MSE, "BaggingRegressor R2 =", R2
Exemple #21
0
def model(base_estimator=base_xt_reg):
    model_params = {
        "base_estimator": base_estimator,
        "n_estimators": 80,
        "max_samples": 1.0,
        "max_features": 1.0,
        "bootstrap": True,
        "bootstrap_features": False,
        "oob_score": False,
        "n_jobs": -1,
        "random_state": random_state,
        "verbose": 3,
    }
    model = ensemble.BaggingRegressor(**model_params)
    model_name = type(model).__name__
    return model_name, model, model_params
Exemple #22
0
def train_rf_zeroinflated(x,
                          y,
                          ntrees=50,
                          njobs=12,
                          max_depth=None,
                          max_features=1.0):
    ''' Return a trained Random Forest regressor'''
    if max_features == 'auto': max_features = 1.0
    rf = ensemble.BaggingRegressor(
        base_estimator=zeroinflated.DecisionTreeZeroInflatedRegressor(),
        n_estimators=ntrees,
        n_jobs=njobs,
        max_features=max_features,
        oob_score=True)
    rf.fit(x, y)
    return rf
Exemple #23
0
def get_bagging_model(
    base_estimator=get_xtr(),
    n_estimators=80,
    n_jobs=-1,
    verbose=1,
):
    """
    Parameters which we will use in final model training on DEVELOPMENT set.
    Dict with parameters names (str) as keys and parameter settings as values.

        * base_estimator: object, default=None.
            The base estimator to fit on random subsets of the dataset.
            If None, then the base estimator is a DecisionTreeRegressor.
        * n_estimators: int, default=10.
            The number of base estimators in the ensemble.
        * max_samples: int or float, default=1.0.
            The # of samples to draw from X to train each base estimator
            (with replacement by default). Lower ratios avoid over-fitting
        * max_features: int or float, default=1.0.
            Like `max_samples` but refer to features. Lower ratios avoid over-fitting.
        * bootstrap: bool, default=True.
            Whether samples are drawn with replacement. If False, sampling without
            replacement is performed.
        * bootstrap_features: bool, default=False.
            Whether features are drawn with replacement.
        * oob_score: bool, default=False.
            Whether to use out-of-bag samples to estimate the generalization error.
        * n_jobs: int, default=None.
            The number of jobs to run in parallel for both fit and predict.
            None means 1. -1 means using all processors.
    """
    print(f"\nLoad model...")
    model_params = {
        "base_estimator": base_estimator,
        "n_estimators": n_estimators,
        "max_samples": 1.0,
        "max_features": 1.0,
        "bootstrap": True,
        "bootstrap_features": False,
        "oob_score": False,
        "n_jobs": n_jobs,
        "random_state": rnd_state,
        "verbose": verbose,
    }
    model = ensemble.BaggingRegressor(**model_params)
    model_name = type(model).__name__
    return model_name, model, model_params
Exemple #24
0
    def __init__(self, df, run_prefix):
        y = df.PHENO
        x = df.drop(columns=['PHENO'])

        x_train, x_test, y_train, y_test = model_selection.train_test_split(
            x, y, test_size=0.3, random_state=42)  # 70:30
        ids_train = x_train.ID
        ids_test = x_test.ID
        x_train = x_train.drop(columns=['ID'])
        x_test = x_test.drop(columns=['ID'])

        self._df = df
        self._run_prefix = run_prefix
        self._x_train = x_train
        self._x_test = x_test
        self._y_train = y_train
        self._y_test = y_test
        self._ids_train = ids_train
        self._ids_test = ids_test

        self.log_table = None
        self.best_algorithm = None
        self.algorithm = None
        self.rfe_df = None

        candidate_algorithms = [
            ensemble.AdaBoostRegressor(),
            ensemble.BaggingRegressor(),
            ensemble.GradientBoostingRegressor(),
            ensemble.RandomForestRegressor(n_estimators=10),
            linear_model.LinearRegression(),
            linear_model.SGDRegressor(),
            neighbors.KNeighborsRegressor(),
            neural_network.MLPRegressor(),
            svm.SVR(gamma='auto'),
            xgboost.XGBRegressor()
        ]

        self._algorithms = {
            algorithm.__class__.__name__: algorithm
            for algorithm in candidate_algorithms
        }
        self._best_algorithm_name = None
        self._best_algorithm = None
        self._best_algorithm_metrics = None
Exemple #25
0
    def __init__(self, df, run_prefix, max_iter, cv_count):
        self.run_prefix = run_prefix
        self.max_iter = max_iter
        self.cv_count = cv_count
       
        self.y_tune = df.PHENO
        self.X_tune = df.drop(columns=['PHENO'])
        self.IDs_tune = self.X_tune.ID
        self.X_tune = self.X_tune.drop(columns=['ID'])

        best_algo_name_in = run_prefix + '.best_algorithm.txt'
        best_algo_df = pd.read_csv(best_algo_name_in, header=None, index_col=False)
        self.best_algo = str(best_algo_df.iloc[0,0])

        self.algorithms = [
            linear_model.LinearRegression(),
            ensemble.RandomForestRegressor(),
            ensemble.AdaBoostRegressor(),
            ensemble.GradientBoostingRegressor(),
            linear_model.SGDRegressor(),
            svm.SVR(),
            neural_network.MLPRegressor(),
            neighbors.KNeighborsRegressor(),
            ensemble.BaggingRegressor(),
            xgboost.XGBRegressor()
        ]

        # Initialize a few variables we will be using later 
        self.log_table = None
        self.best_algo_name_in = None
        self.best_algo_df = None
        self.hyperparameters = None
        self.scoring_metric = None
        self.cv_tuned = None 
        self.cv_baseline = None 
        self.algo = None
        self.searchCVResults = None
        self.rand_search = None
        self.algo_tuned = None
        self.tune_out = None
Exemple #26
0
def findNextTick(df, type):
    df["nextClose"] = df["High"].shift(-1)
    #df["nextTime"] = df["time"].shift(-1)
    df["nextIndex"] = df.index
    df["nextIndex"] = df["nextIndex"].shift(-1)
    df.at[len(df) - 1, 'nextIndex'] = df.iloc[len(df) - 2]["nextIndex"] + 1
    df = df[0:len(df) - 2]
    #df.to_csv("test3.csv")
    X_pred = df[-1:].drop(["nextClose"], axis=1)
    print(X_pred)
    df = df[0:-1]
    X = df.drop(["nextClose"], axis=1)
    #X.to_csv("test4.csv")
    y = df["nextClose"]
    r1 = LinearRegression(n_jobs=-1)
    r2 = tree.DecisionTreeRegressor()
    r3 = ensemble.RandomForestRegressor(n_jobs=-1)
    r4 = ensemble.AdaBoostRegressor()
    r5 = ensemble.BaggingRegressor(n_jobs=-1)
    r6 = ensemble.GradientBoostingRegressor()
    estimators = [('r1', r1), ('r2', r2), ('r3', r3), ('r4', r4), ('r5', r5),
                  ('r6', r6)]
    if (type == 0):
        regressor = ensemble.StackingRegressor(
            estimators=estimators,
            final_estimator=ensemble.RandomForestRegressor(n_estimators=100,
                                                           random_state=42,
                                                           n_jobs=-1))
    elif (type == 1):
        regressor = ensemble.VotingRegressor(estimators=estimators)
    regressor.fit(X, y)  #training the algorithm
    y_pred = list(regressor.predict(X_pred))
    y_pred.insert(0, X_pred.iloc[0]["High"])
    y_pred = np.asarray(y_pred)
    x_predTime = list(X_pred.index)
    x_predTime.append(x_predTime[0] + 1)
    x_predTime = np.asarray(x_predTime)
    print(y_pred)
    print(x_predTime)
    return {"Y": y_pred, "X": x_predTime}
Exemple #27
0
    def _doFit(self, goodData_LR, goodData_HR, weight, local):
        ''' Private function. Fits the neural network.
        '''

        # Once all the samples have been picked build the regression using
        # neural network approach
        print('Fitting neural network')
        HR_scaler = preprocessing.StandardScaler()
        data_HR = HR_scaler.fit_transform(goodData_HR)
        LR_scaler = preprocessing.StandardScaler()
        data_LR = LR_scaler.fit_transform(goodData_LR.reshape(-1, 1))
        if self.regressionType == REG_sknn_ann:
            layers = []
            if 'hidden_layer_sizes' in self.regressorOpt.keys():
                for layer in self.regressorOpt['hidden_layer_sizes']:
                    layers.append(
                        ann_sknn.Layer(self.regressorOpt['activation'],
                                       units=layer))
            else:
                layers.append(
                    ann_sknn.Layer(self.regressorOpt['activation'], units=100))
            self.regressorOpt.pop('activation')
            self.regressorOpt.pop('hidden_layer_sizes')
            output_layer = ann_sknn.Layer('Linear', units=1)
            layers.append(output_layer)
            baseRegressor = ann_sknn.Regressor(layers, **self.regressorOpt)
        else:
            baseRegressor = ann_sklearn.MLPRegressor(**self.regressorOpt)

        # NN regressors do not support sample weights.
        weight = None

        reg = ensemble.BaggingRegressor(baseRegressor,
                                        **self.baggingRegressorOpt)
        if data_HR.shape[0] <= 1:
            reg.max_samples = 1.0
        reg = reg.fit(data_HR, np.ravel(data_LR), sample_weight=weight)

        return {"reg": reg, "HR_scaler": HR_scaler, "LR_scaler": LR_scaler}
Exemple #28
0
 def train(num,X_train,y_train,X_test,y_test):
     if num == 1:
         model = tree.DecisionTreeRegressor()
     elif num == 2:
         model = svm.SVR()
     elif num == 3:
         model = LinearRegression()
     elif num == 4:
         model = neighbors.KNeighborsRegressor(n_neighbors=11)
     elif num == 5:
         model = ensemble.RandomForestRegressor(n_estimators=100)
     elif num == 6:
         model = ensemble.AdaBoostRegressor(n_estimators=100)
     elif num == 7:
         model = ensemble.GradientBoostingRegressor(n_estimators=100)
     elif num == 8:
         model = ensemble.BaggingRegressor()
     elif num == 9:
         model = ExtraTreeRegressor()
     model.fit(X_train, y_train)
     pred=model.predict(X_test)
     return rmse(np.array(y_test), np.array(pred)),r_squared(np.array(y_test),np.array(pred))
Exemple #29
0
def main():
    df = pd.read_csv('./Testing_Oceans_data.csv')
    df = df.convert_objects(convert_numeric=True)

    prediction_label = 'Sound_Velocity(m/s)'

    X = np.array(df.drop([prediction_label], 1))
    y = np.array(df[prediction_label])

    X_train, X_test, y_train, y_test = cross_validation.train_test_split(
        X, y, test_size=0.2)

    evaluations = [
        ('Elastic Net', linear_model.ElasticNet(alpha=0.1), X_train, y_train,
         X_test, y_test),
        ('Lasso', linear_model.Lasso(alpha=0.1), X_train, y_train, X_test,
         y_test),
        ('Ridge', linear_model.Ridge(alpha=.1), X_train, y_train, X_test,
         y_test),
        ('Ensemble Random Forest', ensemble.RandomForestRegressor(), X_train,
         y_train, X_test, y_test),
        ('Ensemble Extra Trees', ensemble.ExtraTreesRegressor(), X_train,
         y_train, X_test, y_test),
        ('Ensemble Bagging Regressor', ensemble.BaggingRegressor(), X_train,
         y_train, X_test, y_test),
        ('Ensemble Gradiant Boosting Regressor',
         ensemble.GradientBoostingRegressor(), X_train, y_train, X_test,
         y_test),
        ('Ensemble Ada Boost Regressor', ensemble.AdaBoostRegressor(), X_train,
         y_train, X_test, y_test),
        ('SVR Kernel Linear', svm.SVR(kernel='linear'), X_train, y_train,
         X_test, y_test),
        ('SVR Kernel RBF', svm.SVR(kernel='rbf'), X_train, y_train, X_test,
         y_test)
    ]

    for evaluation in evaluations:
        evaluate(*evaluation)
Exemple #30
0
 def __init__(self, trainFilename, testFilename, resultsDir):
     # assert len(trainFilenames) == len(testFilenames)
     self.resultsDir = resultsDir
     #ntrees = 1000
     self.trainFilename = trainFilename
     self.testFilename = testFilename
     self.regressors = {
         'lm':
         MultiOutputRegressor(linear_model.LinearRegression()),
         'rg':
         MultiOutputRegressor(linear_model.Ridge()),
         'svm':
         MultiOutputRegressor(svm.SVR(kernel='rbf')),
         'gp':
         MultiOutputRegressor(gaussian_process.GaussianProcessRegressor()),
         'knn':
         MultiOutputRegressor(neighbors.KNeighborsRegressor(n_neighbors=5)),
         'dt':
         MultiOutputRegressor(tree.DecisionTreeRegressor()),
         'br':
         MultiOutputRegressor(ensemble.BaggingRegressor(n_jobs=-1)),
         'etr':
         MultiOutputRegressor(ensemble.ExtraTreesRegressor(n_jobs=-1)),
         'rfr':
         MultiOutputRegressor(ensemble.RandomForestRegressor(n_jobs=-1)),
         'abr':
         MultiOutputRegressor(ensemble.AdaBoostRegressor()),
         'gbr':
         MultiOutputRegressor(ensemble.GradientBoostingRegressor()),
         'xgb':
         MultiOutputRegressor(xgboost.XGBRegressor()),
         'dl':
         None
     }
     self.load_data()
     self.preprocess_data()
     for key in self.regressors.keys():
         self.fit_model(key)