Esempio n. 1
0
def test_weights_regressor():
    """Check weighted average regression prediction on boston dataset."""
    reg1 = DummyRegressor(strategy='mean')
    reg2 = DummyRegressor(strategy='median')
    reg3 = DummyRegressor(strategy='quantile', quantile=.2)
    ereg = VotingRegressor([('mean', reg1), ('median', reg2),
                            ('quantile', reg3)], weights=[1, 2, 10])

    X_r_train, X_r_test, y_r_train, y_r_test = \
        train_test_split(X_r, y_r, test_size=.25)

    reg1_pred = reg1.fit(X_r_train, y_r_train).predict(X_r_test)
    reg2_pred = reg2.fit(X_r_train, y_r_train).predict(X_r_test)
    reg3_pred = reg3.fit(X_r_train, y_r_train).predict(X_r_test)
    ereg_pred = ereg.fit(X_r_train, y_r_train).predict(X_r_test)

    avg = np.average(np.asarray([reg1_pred, reg2_pred, reg3_pred]), axis=0,
                     weights=[1, 2, 10])
    assert_almost_equal(ereg_pred, avg, decimal=2)

    ereg_weights_none = VotingRegressor([('mean', reg1), ('median', reg2),
                                         ('quantile', reg3)], weights=None)
    ereg_weights_equal = VotingRegressor([('mean', reg1), ('median', reg2),
                                          ('quantile', reg3)],
                                         weights=[1, 1, 1])
    ereg_weights_none.fit(X_r_train, y_r_train)
    ereg_weights_equal.fit(X_r_train, y_r_train)
    ereg_none_pred = ereg_weights_none.predict(X_r_test)
    ereg_equal_pred = ereg_weights_equal.predict(X_r_test)
    assert_almost_equal(ereg_none_pred, ereg_equal_pred, decimal=2)
def plot_voting_regressor():
    X, y = load_diabetes(return_X_y=True)

    # Train classifiers
    reg1 = GradientBoostingRegressor(random_state=1)
    reg2 = RandomForestRegressor(random_state=1)
    reg3 = LinearRegression()

    reg1.fit(X, y)
    reg2.fit(X, y)
    reg3.fit(X, y)

    ereg = VotingRegressor([('gb', reg1), ('rf', reg2), ('lr', reg3)])
    ereg.fit(X, y)

    """ Making predictions """
    xt = X[:20]

    pred1 = reg1.predict(xt)
    pred2 = reg2.predict(xt)
    pred3 = reg3.predict(xt)
    pred4 = ereg.predict(xt)

    """ Plot the results """
    plt.figure()
    plt.plot(pred1, 'gd', label='GradientBoostingRegressor')
    plt.plot(pred2, 'b^', label='RandomForestRegressor')
    plt.plot(pred3, 'ys', label='LinearRegression')
    plt.plot(pred4, 'r*', ms=10, label='VotingRegressor')

    plt.tick_params(axis='x', which='both', bottom=False, top=False,
                    labelbottom=False)
    plt.ylabel('predicted')
    plt.xlabel('training samples')
    plt.legend(loc="best")
    plt.title('Regressor predictions and their average')

    plt.show()
Esempio n. 3
0
def train(features: List[str], target_col: str, train_: pd.DataFrame,
          valid_: pd.DataFrame):
    # target_col = "burn_area"
    # date_split = "2013-01-01"
    # train_all = get_training_dataset()
    # train_ = train_all.loc[train_all.date < date_split]
    # valid_ = train_all.loc[train_all.date > date_split]

    X_train, y_train = train_[features], train_[target_col]
    X_valid, y_valid = valid_[features], valid_[target_col]

    xgb_model = xgb.XGBRegressor(
        n_estimators=300,
        max_depth=3,
        colsample_bytree=0.5,
        objective="reg:squarederror",
    )

    xgb_model.fit(X_train, y_train)

    cat_model = CatBoostRegressor(iterations=300,
                                  depth=5,
                                  learning_rate=0.1,
                                  loss_function="RMSE")
    cat_model.fit(X_train, y_train, eval_set=(X_valid, y_valid), plot=True)

    lgb_model = lgb.LGBMRegressor(n_estimators=100,
                                  max_depth=8,
                                  num_leaves=6,
                                  objective="regression")
    lgb_model.fit(X_train, y_train)

    voting_regressor = VotingRegressor([("xgb", xgb_model), ("cat", cat_model),
                                        ("lgb", lgb_model)])
    # voting_regressor = VotingRegressor([('xgb', xgb_model), ('lgb', lgb_model)])
    voting_regressor.fit(X_train, y_train)

    return voting_regressor
Esempio n. 4
0
 def _get_voter(self, mode, estimators, weights=None):
     if self.configs['fit']['train_mode'] == 'clf':
         if mode == 'average':
             voting = 'soft'
         elif mode == 'vote':
             voting = 'hard'
         voter = VotingClassifier(
             estimators=estimators, voting=voting,
             weights=weights, n_jobs=-1)
     elif self.configs['fit']['train_mode'] == 'reg':
         if mode == 'average':
             voter = VotingRegressor(
                 estimators=estimators, weights=weights, n_jobs=-1)
     return voter
Esempio n. 5
0
 def test_model_voting_regression(self):
     # Could not find an implementation for the node Sum:Sum(8)
     model = VotingRegressor([
         ('lr', LinearRegression()),
         ('dt', SGDRegressor())])
     model, X = fit_regression_model(model)
     model_onnx = convert_sklearn(
         model, "voting regression",
         [("input", DoubleTensorType([None, X.shape[1]]))],
         target_opset=TARGET_OPSET)
     dump_data_and_model(
         X.astype(np.float64), model, model_onnx,
         basename="SklearnVotingRegressorDouble",
         comparable_outputs=[0])
Esempio n. 6
0
def initialize(method, coef=None):
    regressor = None
    if method == "linear_regression":
        regressor = linear_model.LinearRegression(normalize=True)
    if method == "elastic_net":
        regressor = linear_model.ElasticNet(normalize=True)
    if method == "bayesian_ridge":
        regressor = linear_model.BayesianRidge(normalize=True)
    if method == "Support_Vector_Machine":
        regressor = svm.SVR()
    if method == "Decision_tree":
        regressor = tree.DecisionTreeClassifier()
    if method == "KNN":
        regressor = NearestCentroid()
    if method == "Gaussian":
        regressor = GaussianNB()
    if method == "Random_Forest":
        if coef == None:
            regressor = RandomForestRegressor(n_estimators=30)
        else:
            regressor = RandomForestRegressor(n_estimators=coef)
    else:
        if "Random_Forest" in method:
            trees = method.split("t")[1]
            regressor = RandomForestRegressor(n_estimators=int(trees))
    if method == "ensemble":
        r1 = initialize("linear_regression")
        r2 = initialize("Random_Forest")
        r3 = initialize("bayesian_ridge")
        if coef == None:
            regressor = VotingRegressor([('lr', r1), ('rf', r2), ('br', r3)])
        else:
            regressor = VotingRegressor(estimators=[('lr', r1), ('rf', r2),
                                                    ('br', r3)],
                                        weights=coef)

    return regressor
Esempio n. 7
0
def _regress():
    #------------Regression------------

    #knn
    knnr = KNeighborsRegressor()
    #logistic
    lr = LogisticRegression()
    #svm
    svr = LinearSVR()
    #nn
    mlpr = MLPRegressor()
    #xgboost
    xgbr = XGBRegressor()
    #voting
    votec = VotingRegressor(
        estimators=[('knnr', knnr), ('lr', lr), ('svr',
                                                 svr), ('mlpr',
                                                        mlpr), ('xgbr', xgbr)])
    votec = votec.fit(xtr, ytr_encoded)

    y_pred = votec.predict(xte)
    print()
    print(mean_squared_error(y_true=yte, y_pred=y_pred))
    print()
Esempio n. 8
0
def regression_modeling(data):
    '''Models the response rate with Voting Regression'''
    # Scaling the data
    scaled_data = preprocessing.StandardScaler().fit_transform(data)

    # Creating train-test
    X = scaled_data[:, 0:8]
    y = scaled_data[:, 8]

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.3,
                                                        random_state=42)

    #Voting Regression
    reg1 = GradientBoostingRegressor(random_state=1, n_estimators=10)
    reg2 = RandomForestRegressor(random_state=1, n_estimators=10)
    reg3 = LinearRegression()
    ereg = VotingRegressor(estimators=[('gb', reg1), ('rf', reg2), ('lr',
                                                                    reg3)])
    ereg = ereg.fit(X_train, y_train)
    y_hat_ereg = ereg.predict(X_test)
    r2_ereg = r2_score(y_test, y_hat_ereg)
    return r2_ereg
Esempio n. 9
0
 def run_ensemble_run(self, model_name = 'Ensemble'):
     reg1 = SVR(C=10, kernel= "rbf", epsilon = 0.1, gamma = 'auto')
     reg2 = KNeighborsRegressor(n_neighbors = 11)
     reg3 = RandomForestRegressor(n_estimators = 100)
     
     model = VotingRegressor([('RF', reg3)])
     model.fit(self.X_train, self.Y_train)
     
     self.evaluate_regression(self.Y_train, model.predict(self.X_train), self.dates_train, model_name+'-OnTrain', slicer = 1)
     self.evaluate_regression(self.Y_test, model.predict(self.X_test), self.dates_test, model_name+'-OnTest', slicer = 1)
Esempio n. 10
0
    def ensemble_of_best_params_xgb_reg(self, max_evals):
        best_params = self.params_to_ensemble(fn_name='xgb_reg',
                                              space=xgb_para,
                                              algo=tpe.suggest,
                                              max_evals=max_evals)

        models_to_voting = {}
        for i in range(len(best_params)):
            reg = xgb.XGBRegressor(**best_params[i])
            models_to_voting[str(i)] = reg

        model_ensemble = VotingRegressor([
            (name, model) for name, model in models_to_voting.items()
        ])

        return model_ensemble, best_params
Esempio n. 11
0
def test_notfitted():
    eclf = VotingClassifier(estimators=[('lr1', LogisticRegression()),
                                        ('lr2', LogisticRegression())],
                            voting='soft')
    ereg = VotingRegressor([('dr', DummyRegressor())])
    msg = ("This %s instance is not fitted yet. Call \'fit\'"
           " with appropriate arguments before using this method.")
    assert_raise_message(NotFittedError, msg % 'VotingClassifier',
                         eclf.predict, X)
    assert_raise_message(NotFittedError, msg % 'VotingClassifier',
                         eclf.predict_proba, X)
    assert_raise_message(NotFittedError, msg % 'VotingClassifier',
                         eclf.transform, X)
    assert_raise_message(NotFittedError, msg % 'VotingRegressor', ereg.predict,
                         X_r)
    assert_raise_message(NotFittedError, msg % 'VotingRegressor',
                         ereg.transform, X_r)
Esempio n. 12
0
def get_model(param: dict) -> BaseEstimator:
    model_name = param.pop('name')
    if model_name == 'xgb':
        return XGBRegressor(**param[model_name])
    elif model_name == 'lgb':
        return LGBMRegressor(**param[model_name])
    elif model_name == 'cb':
        return CatBoostRegressor(**param[model_name])
    elif model_name == 'rf':
        return RandomForestRegressor(**param[model_name])
    elif model_name == 'svm':
        return make_pipeline(StandardScaler(), SVR(**param[model_name]))
    elif model_name == 'knn':
        return make_pipeline(StandardScaler(), KNeighborsRegressor(**param[model_name]))
    elif model_name == 'mlp':
        return make_pipeline(StandardScaler(), MLPRegressor(**param[model_name]))
    elif model_name == 'vote':
        return VotingRegressor(estimators=[
            ('svm', get_model(dict(param, name='svm'))),
            ('rf', get_model(dict(param, name='rf'))),
            ('lgb', get_model(dict(param, name='lgb'))),
            ('knn', get_model(dict(param, name='knn'))),
        ])
    elif model_name == 'stack':
        model = SuperLearner(scorer=mean_squared_error, random_state=132)
        model.add([
            get_model(dict(param, name='svm')),
            get_model(dict(param, name='rf')),
            get_model(dict(param, name='lgb')),
            get_model(dict(param, name='knn')),
        ])
        model.add_meta(GradientBoostingRegressor(random_state=22))
        return model
    elif model_name == 'sk_stack':
        return StackingRegressor(
            estimators=[
                ('svm', get_model(dict(param, name='svm'))),
                ('rf', get_model(dict(param, name='rf'))),
                ('lgb', get_model(dict(param, name='lgb'))),
                ('knn', get_model(dict(param, name='knn'))),
            ],
            final_estimator=GradientBoostingRegressor(random_state=42)
        )
Esempio n. 13
0
    def ensemble_pipe(self, pipes):
        """Create a mean ensemble pipe where individual pipes feed into 
           a mean voting ensemble model.

        Args:
            pipes (list): List of pipes that will have their outputs averaged

        Returns:
            Pipeline: Pipeline object that has multiple multiple feeding Voting object
        """
        ests = []
        for i, p in enumerate(pipes):
            ests.append((f'p{i}', p))

        if self.model_obj == 'reg':
            ensemble = VotingRegressor(estimators=ests)
        elif self.model_obj == 'class':
            ensemble = VotingClassifier(estimators=ests)

        return Pipeline([('ensemble', ensemble)])
def define_models():
    # linear regression
    reg_model = LinearRegression()
    xgb_model = XGBRegressor(colsample_bytree=0.6,
                             gamma=0.7,
                             max_depth=4,
                             objective='reg:squarederror')
    ada = AdaBoostRegressor(random_state=0, n_estimators=100)
    rf = make_pipeline(
        MinMaxScaler(),
        RandomForestRegressor(bootstrap=True,
                              max_features=0.15000000000000002,
                              min_samples_leaf=6,
                              min_samples_split=16,
                              n_estimators=100))
    # rf = RandomForestRegressor(bootstrap=True, max_features=0.15000000000000002, min_samples_leaf=6, min_samples_split=16, n_estimators=100)
    # svr = SVR(C=1.0, epsilon=0.2)
    er = VotingRegressor([('rf', rf), ('xgb_model', xgb_model)])

    return [reg_model, xgb_model, ada, rf, er]
Esempio n. 15
0
def get_estimator():
    data_merger = FunctionTransformer(_merge_external_data)
    date_encoder = FunctionTransformer(_encode_dates)
    date_cols = ["DateOfDeparture"]

    categorical_encoder = make_pipeline(
        SimpleImputer(strategy="constant", fill_value="missing"),
        OneHotEncoder(handle_unknown="ignore"))
    categorical_cols = [
        "Arrival", "Departure", "day", "weekday", "holidays", "week", "n_days"
    ]

    preprocessor = make_column_transformer(
        (categorical_encoder, categorical_cols))
    # Best parameters RandomForest
    n_estimators_rf = 1400
    min_samples_split_rf = 2
    min_samples_leaf_rf = 2
    max_features_rf = 'auto'
    max_depth_rf = 70
    bootstrap_rf = True

    # Best parameters SVR
    C_svr = 100
    gamma_svr = 0.01
    kernel_svr = 'rbf'

    rf = RandomForestRegressor(n_estimators=n_estimators_rf,
                               max_depth=max_depth_rf,
                               max_features=max_features_rf,
                               min_samples_split=min_samples_split_rf,
                               min_samples_leaf=min_samples_leaf_rf,
                               bootstrap=bootstrap_rf,
                               n_jobs=-1)

    svr = SVR(C=C_svr, gamma=gamma_svr, kernel=kernel_svr)

    regressor_voting = VotingRegressor(estimators=[('rf', rf), ("svr", svr)])

    return make_pipeline(data_merger, date_encoder, preprocessor,
                         regressor_voting)
Esempio n. 16
0
def get_estimator():
    '''Returns pipeline with the model to be used on the train data.'''
    # CatBoostRegressor
    boost_reg = CatBoostRegressor(n_estimators = 5000, learning_rate=0.05, 
                                  max_depth=6, verbose=False)
    # add regressor to the pre-precessing pipeline
    pipeline_boost = preprocessor('Boost').steps.append(['model',boost_reg])
    
    # Neural Network
    nn_reg = KerasRegressor(build_fn=model.nn_model, epochs=60, batch_size=16,
                            verbose=False)
    KerasRegressor._estimator_type = "regressor"
    # add regressor to the pre-precessing pipeline
    pipeline_nn = preprocessor('NN').steps.append(['model', nn_reg])
    
    # Voting regressor
    regressor = VotingRegressor(estimators=
        [('boost', pipeline_boost), ('nn', pipeline_nn)]
    )
 
    return regressor
def get_regressor(i):
    regressor = 'linear'
    if i == 'linear':
        regressor = linear_model.LinearRegression()
    elif i == 'svr':
        regressor = svm.SVR()
    elif i == 'knn':
        regressor = KNeighborsRegressor()
    elif i == 'gradient_boost':
        regressor = GradientBoostingRegressor()
    elif i == 'decision_tree':
        regressor = tree.DecisionTreeRegressor()
    elif i == 'random_forest':
        regressor = RandomForestRegressor()
    elif i == 'mlp':
        regressor = MLPRegressor(random_state=1, max_iter=500)
    elif i == 'voting':
        regr = GradientBoostingRegressor()
        regr2 = tree.DecisionTreeRegressor()
        regressor = VotingRegressor(estimators=[('gb', regr), ('rf', regr2)])

    return regressor
def steam_learning_voting(data, NUM_FOLDS):
    """
    Voting regressor that combines different types of regressors to try and overcome their weaknesses.
    """
    X = data[["positive_ratings_", "negative_ratings_", "owners_", "average_playtime_", "median_playtime_"]]
    y = data[["price_"]]


    kfold = KFold(n_splits=NUM_FOLDS)

    gradient_boosting_model = GradientBoostingRegressor(random_state=1, n_estimators=20)
    random_forest_model = RandomForestRegressor(random_state=1, n_estimators=20)
    linear_regression_model = linear_model.LinearRegression()
    voting_model = VotingRegressor(estimators=[('gb', gradient_boosting_model), ('rf', random_forest_model), ('lr', linear_regression_model)])
    mse_scorer = make_scorer(mean_squared_error)

    results = cross_val_score(voting_model, X, y.values.ravel(), scoring=mse_scorer, cv=kfold)
    print(f"Boosting - MSE Array: {results}")

    final_results = f"Voting - Mean MSE over {NUM_FOLDS} folds: {np.mean(results)}"
    print(final_results)
    return(final_results)
Esempio n. 19
0
def test_notfitted():
    eclf = VotingClassifier(estimators=[('lr1', LogisticRegression()),
                                        ('lr2', LogisticRegression())],
                            voting='soft')
    ereg = VotingRegressor([('dr', DummyRegressor())])
    msg = ("This %s instance is not fitted yet. Call \'fit\'"
           " with appropriate arguments before using this estimator.")
    with pytest.raises(NotFittedError, match=msg % 'VotingClassifier'):
        eclf.predict(X)
    with pytest.raises(NotFittedError, match=msg % 'VotingClassifier'):
        eclf.predict_proba(X)
    with pytest.raises(NotFittedError, match=msg % 'VotingClassifier'):
        eclf.transform(X)
    with pytest.raises(NotFittedError, match=msg % 'VotingRegressor'):
        ereg.predict(X_r)
    with pytest.raises(NotFittedError, match=msg % 'VotingRegressor'):
        ereg.transform(X_r)
Esempio n. 20
0
def esmld():
    r1 = LinearRegression()
    #r2 = RandomForestRegressor(n_estimators=10, random_state=1)
    r3 = SVR(kernel='rbf')

    er = VotingRegressor([
        ('lr', r1),
        #('rf', r2),
        ('svr_rbf', r3)
    ])

    er.fit(X_train, y_train)
    y_pred = er.fit(X_train, y_train).predict(X_test)
    st.write('Mean Absolute Error:', mean_absolute_error(y_test, y_pred))
    st.write('Mean Squared Error:', mean_squared_error(y_test, y_pred))
    st.write('Root Mean Squared Error:',
             np.sqrt(mean_squared_error(y_test, y_pred)))

    print(er.fit(X_train, y_train).predict(X_test))

    st.title(er.fit(X_train, y_train).predict(X_test))
Esempio n. 21
0
def main():
    df = read_df()
    #df = pd.DataFrame(df)

    x_name = "BindLevel"
    y_name = "Rank"

    X = df[x_name].values
    y = df[y_name].values

    regression(LinearRegression(), x_name, y_name, df)
    regression(Ridge(alpha=.5), x_name, y_name, df)
    regression(neighbors.KNeighborsRegressor(), x_name, y_name, df)
    regression(DecisionTreeRegressor(random_state=0), x_name, y_name, df)
    #regression(RANSACRegressor(random_state=0), x_name, y_name, df)
    regression(VotingRegressor([('lr', LinearRegression()), ('rf', RandomForestRegressor(n_estimators=10, random_state=1))]), x_name, y_name, df)



    # Selecting columns
    dataset = df[['BindLevel', 'Gl', 'Gp', 'Ip', 'Mixcr']]

    k_neihgbours(dataset)
Esempio n. 22
0
    def _get_base_ensembler(self, models):

        # If wrapping in ensemble, set n_jobs for ensemble
        # and each indv model, make sure 1
        for model in models:
            try:
                model[1].n_jobs = 1
            except AttributeError:
                pass

            # Ensemble of des ensembles case
            if hasattr(model[1], 'estimators'):
                for estimator in model[1].estimators:
                    try:
                        estimator.n_jobs = 1
                    except AttributeError:
                        pass

        if self.spec['problem_type'] == 'regression':
            return VotingRegressor(models, n_jobs=self.spec['n_jobs'])

        return VotingClassifier(models,
                                voting='soft',
                                n_jobs=self.spec['n_jobs'])
Esempio n. 23
0
    def __init__(self, x_train, y_train, test_split_available=False, test_size=0.1, shuffle=True, number_of_estimator=10, estimator=None, estimators=None, random_state=None):
        if test_split_available:
            self.x_train, self.x_test, self.y_train, self.y_test = train_test_split(x_train, y_train,
                                                                                    test_size=test_size,
                                                                                    shuffle=shuffle,
                                                                                    random_state=random_state)
        else:
            self.x_test = x_train
            self.y_test = y_train
            self.x_train = x_train
            self.y_train = y_train
        self.y_predict_test = {}
        self.y_predict_train = {}
        self.models = {'svr': SVR(), 'knn': KNeighborsRegressor(), 'tree': DecisionTreeRegressor(),
                       'logistic': LogisticRegression(), 'linear': LinearRegression(), 'ridge': Ridge(),
                       'ridgecv': RidgeCV(), 'lasso': Lasso(), 'lassolars': LassoLars(alpha=0.1),
                       'bayesian': BayesianRidge(), 'ElasticNet': ElasticNet(),
                       'TheilSenRegressor': TheilSenRegressor(),
                       'ARDRegression': ARDRegression(), 'RANSACRegressor': RANSACRegressor(),
                       'HuberRegressor': HuberRegressor(), 'randomForest': RandomForestRegressor(n_estimators=50),
                       'boost': AdaBoostRegressor(random_state=0, n_estimators=100)}

        self.estimator = self.models[estimator]
        estimators_list = []
        for i in range(len(estimators)):
            estimators_list.append((estimators[i], self.models[estimators[i]]))

        self.models = {'svr': SVR(), 'knn': KNeighborsRegressor(), 'tree': DecisionTreeRegressor(),
                       'logistic': LogisticRegression(), 'linear': LinearRegression(), 'ridge': Ridge(),
                       'ridgecv': RidgeCV(), 'lasso': Lasso(), 'lassolars': LassoLars(alpha=0.1),
                       'bayesian': BayesianRidge(), 'ElasticNet': ElasticNet(),
                       'TheilSenRegressor': TheilSenRegressor(),
                       'ARDRegression': ARDRegression(), 'RANSACRegressor': RANSACRegressor(),
                       'HuberRegressor': HuberRegressor(), 'randomForest': RandomForestRegressor(n_estimators=50),
                       'bagging': BaggingRegressor(base_estimator=self.estimator, n_estimators=number_of_estimator, max_features=0.8),
                       'voting': VotingRegressor(estimators=estimators_list), 'boost': AdaBoostRegressor(random_state=0, n_estimators=100)}
Esempio n. 24
0
def test_notfitted():
    eclf = VotingClassifier(
        estimators=[("lr1", LogisticRegression()),
                    ("lr2", LogisticRegression())],
        voting="soft",
    )
    ereg = VotingRegressor([("dr", DummyRegressor())])
    msg = ("This %s instance is not fitted yet. Call 'fit'"
           " with appropriate arguments before using this estimator.")
    with pytest.raises(NotFittedError, match=msg % "VotingClassifier"):
        eclf.predict(X)
    with pytest.raises(NotFittedError, match=msg % "VotingClassifier"):
        eclf.predict_proba(X)
    with pytest.raises(NotFittedError, match=msg % "VotingClassifier"):
        eclf.transform(X)
    with pytest.raises(NotFittedError, match=msg % "VotingRegressor"):
        ereg.predict(X_r)
    with pytest.raises(NotFittedError, match=msg % "VotingRegressor"):
        ereg.transform(X_r)
Esempio n. 25
0
#Ensemble, different k -> 0.06736

## final setup without ensemble-> 0.063695 , with -> 0.0635..
model = neighbors.KNeighborsRegressor(n_neighbors=best_k,
                                      algorithm='kd_tree',
                                      weights='distance')
model2 = neighbors.KNeighborsRegressor(n_neighbors=int(best_k / 2),
                                       algorithm='kd_tree',
                                       weights='distance')
model3 = neighbors.KNeighborsRegressor(n_neighbors=best_k * 2,
                                       algorithm='kd_tree',
                                       weights='distance')
model4 = neighbors.KNeighborsRegressor(n_neighbors=best_k - 2,
                                       algorithm='kd_tree',
                                       weights='distance')
model5 = neighbors.KNeighborsRegressor(n_neighbors=best_k + 2,
                                       algorithm='kd_tree',
                                       weights='distance')
ensemble = VotingRegressor([('m1', model), ('m2', model2), ('m3', model3),
                            ('m4', model4), ('m5', model5)],
                           weights=[1, 1, 1, 1, 1])
ensemble.fit(x_train, y_train)
# model.fit(x_train, y_train)
pred = ensemble.predict(x_test)  #make prediction on test set
error = mean_absolute_error(y_test, pred)  #calculate err
r2 = r2_score(y_test, pred)
print('MAE: ', error)
print('R2: ', r2)
error_RMSE = math.sqrt(mean_squared_error(y_test, pred))  #calculate err
print('RMSE value  is:', error_RMSE)
def model_to_test():
    return VotingRegressor([
        ('lr', LinearRegression()),
        ('dt', DecisionTreeRegressor()),
    ])
Esempio n. 27
0
from sklearn.neighbors import KNeighborsRegressor
knn = KNeighborsRegressor()
knn = KNeighborsRegressor(algorithm='brute')
knn.fit(X_train, y_train)
knn.score(X_train, y_train)
knn.score(X_test, y_test)

#votingRegressor
from sklearn.ensemble import VotingRegressor
reg1 = GradientBoostingRegressor()
reg2 = RandomForestRegressor()
reg3 = LinearRegression()
reg4 = DecisionTreeRegressor()
reg5 = KNeighborsRegressor()
reg6 = AdaBoostRegressor()
ereg = VotingRegressor(estimators=[('gb', reg1), ('rf', reg2)])
ereg = ereg.fit(X_train, y_train)
ereg.score(X_train, y_train)
ereg.score(X_test, y_test)

#predict values from voting method compare it to y_test
vote_pred = ereg.predict(X_test)
#
#mse in $
mse = mean_absolute_error(y_test, vote_pred)
print("The mean absolute error is:$", mse)
#chceking r^2
from sklearn.metrics import r2_score

print("r_Score:", r2_score(y_test, vote_pred))
Esempio n. 28
0
pred2 = regr2.predict(X_test2).round(0)
RFRMSE2 = mse(y_test2, pred2)
print(RFRMSE2)
print("Average error on new number of hospitalizations per day:",
      round(RFRMSE2**0.5, 0))

print("XGBoost Regressor Model")
xgb_model = xgb.XGBRegressor(n_jobs=1).fit(X_train2, y_train2)
pred3 = xgb_model.predict(X_test2).round(0)
RFRMSE3 = mse(y_test2, pred3)
print("Average error on new number of hospitalizations per day:",
      round(RFRMSE3**0.5, 0))
print(RFRMSE3)

print("VotingRegressor")
ensemble = VotingRegressor(estimators=[("rf", regr2), ("gbr", model),
                                       ("dtr", ETregr), ("xgbr", xgb_model)], )

ensemble.fit(X_train2, y_train2)
predvot = ensemble.predict(X_test2).round(0)
MSE5 = mse(y_test2, predvot)
print("Average error on new number of hospitalizations per day:",
      round(MSE5**0.5, 0))
print(MSE5)

print("VotingRegressor2")
ensemble2 = VotingRegressor(estimators=[("rf", regr), ("gbr", model)], )

ensemble2.fit(X_train2, y_train2)
predvot2 = ensemble2.predict(X_test2).round(0)
MSE6 = mse(y_test2, predvot2)
print("Average error on new number of hospitalizations per day:",
Esempio n. 29
0
def test_weights_regressor():
    """Check weighted average regression prediction on boston dataset."""
    reg1 = DummyRegressor(strategy='mean')
    reg2 = DummyRegressor(strategy='median')
    reg3 = DummyRegressor(strategy='quantile', quantile=.2)
    ereg = VotingRegressor([('mean', reg1), ('median', reg2),
                            ('quantile', reg3)],
                           weights=[1, 2, 10])

    X_r_train, X_r_test, y_r_train, y_r_test = \
        train_test_split(X_r, y_r, test_size=.25)

    reg1_pred = reg1.fit(X_r_train, y_r_train).predict(X_r_test)
    reg2_pred = reg2.fit(X_r_train, y_r_train).predict(X_r_test)
    reg3_pred = reg3.fit(X_r_train, y_r_train).predict(X_r_test)
    ereg_pred = ereg.fit(X_r_train, y_r_train).predict(X_r_test)

    avg = np.average(np.asarray([reg1_pred, reg2_pred, reg3_pred]),
                     axis=0,
                     weights=[1, 2, 10])
    assert_almost_equal(ereg_pred, avg, decimal=2)

    ereg_weights_none = VotingRegressor([('mean', reg1), ('median', reg2),
                                         ('quantile', reg3)],
                                        weights=None)
    ereg_weights_equal = VotingRegressor([('mean', reg1), ('median', reg2),
                                          ('quantile', reg3)],
                                         weights=[1, 1, 1])
    ereg_weights_none.fit(X_r_train, y_r_train)
    ereg_weights_equal.fit(X_r_train, y_r_train)
    ereg_none_pred = ereg_weights_none.predict(X_r_test)
    ereg_equal_pred = ereg_weights_equal.predict(X_r_test)
    assert_almost_equal(ereg_none_pred, ereg_equal_pred, decimal=2)
Esempio n. 30
0
	build_auto(GBDTLMRegressor(RandomForestRegressor(n_estimators = 7, max_depth = 6, random_state = 13), LinearRegression()), "GBDTLMAuto")
	build_auto(GBDTLMRegressor(XGBRFRegressor(n_estimators = 17, max_depth = 6, random_state = 13), ElasticNet(random_state = 13)), "XGBRFLMAuto")
	build_auto(GradientBoostingRegressor(init = None, random_state = 13), "GradientBoostingAuto")
	build_auto(HistGradientBoostingRegressor(max_iter = 31, random_state = 13), "HistGradientBoostingAuto")
	build_auto(HuberRegressor(), "HuberAuto")
	build_auto(LarsCV(cv = 3), "LarsAuto")
	build_auto(LassoCV(cv = 3, random_state = 13), "LassoAuto")
	build_auto(LassoLarsCV(cv = 3), "LassoLarsAuto")
	build_auto(LinearRegression(), "LinearRegressionAuto")
	build_auto(BaggingRegressor(LinearRegression(), max_features = 0.75, random_state = 13), "LinearRegressionEnsembleAuto")
	build_auto(OrthogonalMatchingPursuitCV(cv = 3), "OMPAuto")
	build_auto(RandomForestRegressor(n_estimators = 10, min_samples_leaf = 3, random_state = 13), "RandomForestAuto", flat = True)
	build_auto(RidgeCV(), "RidgeAuto")
	build_auto(StackingRegressor([("ridge", Ridge(random_state = 13)), ("lasso", Lasso(random_state = 13))], final_estimator = GradientBoostingRegressor(n_estimators = 7, random_state = 13)), "StackingEnsembleAuto")
	build_auto(TheilSenRegressor(n_subsamples = 31, random_state = 13), "TheilSenAuto")
	build_auto(VotingRegressor([("dt", DecisionTreeRegressor(random_state = 13)), ("knn", KNeighborsRegressor()), ("lr", LinearRegression())], weights = [3, 1, 2]), "VotingEnsembleAuto")
	build_auto(XGBRFRegressor(n_estimators = 31, max_depth = 6, random_state = 13), "XGBRFAuto")

if "Auto" in datasets:
	build_auto(TransformedTargetRegressor(DecisionTreeRegressor(random_state = 13)), "TransformedDecisionTreeAuto")
	build_auto(TransformedTargetRegressor(LinearRegression(), func = numpy.log, inverse_func = numpy.exp), "TransformedLinearRegressionAuto")

def build_auto_isotonic(regressor, auto_isotonic_X, name):
	pipeline = PMMLPipeline([
		("regressor", regressor)
	])
	pipeline.fit(auto_isotonic_X, auto_y)
	pipeline.verify(auto_isotonic_X.sample(frac = 0.05, random_state = 13))
	store_pkl(pipeline, name)
	mpg = DataFrame(pipeline.predict(auto_isotonic_X), columns = ["mpg"])
	store_csv(mpg, name)
 res, lgb_m = run_regression(lgb_m, t_list, 'LightGBM_grid_search', res)
 params['lgb_grid'] = lgb_m.get_params()
 xgb_m = grid_fit(xgb_m, xgb_range, t_list)
 res, xgb_m = run_regression(xgb_m, t_list, 'XGboost_grid_search', res)
 params['xgb_grid'] = xgb_m.get_params()
 vr_range = {
     'rf__max_depth': [18, 22],
     'lgb__n_estimators': [32, 40],
     'lgb__num_leaves': [30, 40],
     'xgb__n_estimators': [480, 520]
 }
 lgb_init['learning_rate'] = 0.15
 xgb_init['learning_rate'] = 0.1
 rf_init['min_samples_split'] = 10
 rf, lgb_m, xgb_m = regen_model(rf_params, lgb_params, xgb_params)
 hybrid_m = VotingRegressor([('rf', rf), ('lgb', lgb_m), ('xgb', xgb_m)])
 res, hybrid_m = run_regression(hybrid_m, t_list, 'hybrid_regrission', res)
 rf, lgb_m, xgb_m = regen_model(rf_init, lgb_init, xgb_init)
 hybrid_m = VotingRegressor([('rf', rf), ('lgb', lgb_m), ('xgb', xgb_m)])
 hybrid_m = grid_fit(hybrid_m, vr_range, t_list)
 res, hybrid_m = run_regression(hybrid_m, t_list,
                                'hybrid_regrission_grid_search', res)
 params['vr_grid'] = {
     x[0]: x[1].get_params()
     for x in hybrid_m.get_params()['estimators']
 }
 rf, lgb_m, xgb_m = regen_model(rf_params, lgb_params, xgb_params)
 stack_m = StackingRegressor(estimators=[('rf', rf), ('lgb', lgb_m)],
                             final_estimator=xgb_m)  #('xgb', xgb_m))
 res, stack_m = run_regression(stack_m, t_list, 'stack_generation', res)
 rf, lgb_m, xgb_m = regen_model(rf_init, lgb_init, xgb_params)