Ejemplo n.º 1
0
def test_weights_regressor():
    """Check weighted average regression prediction on boston dataset."""
    reg1 = DummyRegressor(strategy='mean')
    reg2 = DummyRegressor(strategy='median')
    reg3 = DummyRegressor(strategy='quantile', quantile=.2)
    ereg = VotingRegressor([('mean', reg1), ('median', reg2),
                            ('quantile', reg3)],
                           weights=[1, 2, 10])

    X_r_train, X_r_test, y_r_train, y_r_test = \
        train_test_split(X_r, y_r, test_size=.25)

    reg1_pred = reg1.fit(X_r_train, y_r_train).predict(X_r_test)
    reg2_pred = reg2.fit(X_r_train, y_r_train).predict(X_r_test)
    reg3_pred = reg3.fit(X_r_train, y_r_train).predict(X_r_test)
    ereg_pred = ereg.fit(X_r_train, y_r_train).predict(X_r_test)

    avg = np.average(np.asarray([reg1_pred, reg2_pred, reg3_pred]),
                     axis=0,
                     weights=[1, 2, 10])
    assert_almost_equal(ereg_pred, avg, decimal=2)

    ereg_weights_none = VotingRegressor([('mean', reg1), ('median', reg2),
                                         ('quantile', reg3)],
                                        weights=None)
    ereg_weights_equal = VotingRegressor([('mean', reg1), ('median', reg2),
                                          ('quantile', reg3)],
                                         weights=[1, 1, 1])
    ereg_weights_none.fit(X_r_train, y_r_train)
    ereg_weights_equal.fit(X_r_train, y_r_train)
    ereg_none_pred = ereg_weights_none.predict(X_r_test)
    ereg_equal_pred = ereg_weights_equal.predict(X_r_test)
    assert_almost_equal(ereg_none_pred, ereg_equal_pred, decimal=2)
Ejemplo n.º 2
0
def test_weights_regressor():
    """Check weighted average regression prediction on boston dataset."""
    reg1 = DummyRegressor(strategy='mean')
    reg2 = DummyRegressor(strategy='median')
    reg3 = DummyRegressor(strategy='quantile', quantile=.2)
    ereg = VotingRegressor([('mean', reg1), ('median', reg2),
                            ('quantile', reg3)], weights=[1, 2, 10])

    X_r_train, X_r_test, y_r_train, y_r_test = \
        train_test_split(X_r, y_r, test_size=.25)

    reg1_pred = reg1.fit(X_r_train, y_r_train).predict(X_r_test)
    reg2_pred = reg2.fit(X_r_train, y_r_train).predict(X_r_test)
    reg3_pred = reg3.fit(X_r_train, y_r_train).predict(X_r_test)
    ereg_pred = ereg.fit(X_r_train, y_r_train).predict(X_r_test)

    avg = np.average(np.asarray([reg1_pred, reg2_pred, reg3_pred]), axis=0,
                     weights=[1, 2, 10])
    assert_almost_equal(ereg_pred, avg, decimal=2)

    ereg_weights_none = VotingRegressor([('mean', reg1), ('median', reg2),
                                         ('quantile', reg3)], weights=None)
    ereg_weights_equal = VotingRegressor([('mean', reg1), ('median', reg2),
                                          ('quantile', reg3)],
                                         weights=[1, 1, 1])
    ereg_weights_none.fit(X_r_train, y_r_train)
    ereg_weights_equal.fit(X_r_train, y_r_train)
    ereg_none_pred = ereg_weights_none.predict(X_r_test)
    ereg_equal_pred = ereg_weights_equal.predict(X_r_test)
    assert_almost_equal(ereg_none_pred, ereg_equal_pred, decimal=2)
Ejemplo n.º 3
0
 def run_ensemble_run(self, model_name = 'Ensemble'):
     reg1 = SVR(C=10, kernel= "rbf", epsilon = 0.1, gamma = 'auto')
     reg2 = KNeighborsRegressor(n_neighbors = 11)
     reg3 = RandomForestRegressor(n_estimators = 100)
     
     model = VotingRegressor([('RF', reg3)])
     model.fit(self.X_train, self.Y_train)
     
     self.evaluate_regression(self.Y_train, model.predict(self.X_train), self.dates_train, model_name+'-OnTrain', slicer = 1)
     self.evaluate_regression(self.Y_test, model.predict(self.X_test), self.dates_test, model_name+'-OnTest', slicer = 1)
Ejemplo n.º 4
0
def trail_main():
    n_folds = 10
    train_path = 'data/assign3_students_train.txt'
    test_path = 'data/assign3_students_test.txt'
    train_data = read_process_data(train_path)
    test_data = read_process_data(test_path)
    models_dict = get_models()
    scores_dict = {}
    learned_models_dict = {}
    for df_key, df_val in train_data.items():
        X_train, X_test, y_train, y_test = get_final_score_tts(
            df_val.copy(), test_data[df_key].copy(), n_best=15)
        voting_list = []
        for model_key, model_val in models_dict.items():
            model = model_val.fit(X_train, y_train)
            name = f'{df_key}_{model_key}'
            learned_models_dict[name] = model
            voting_list.append((name, model))
            #         print(f"{name}, Train MSE ", mean_squared_error(y_train, model.predict(X_train)))
            #         print(f"{name}, Train RScore ", r2_score(y_train, model.predict(X_train)))
            #         print(f"{name}, Test RScore ", r2_score(y_test, model.predict(X_test)))
            print(f"X_test: {X_test.shape}, y_test: {y_test.shape}")
            print(f"{name}, Test MSE ",
                  mean_squared_error(y_test, model.predict(X_test)))
            print(f"{name}, Test Score", model.score(X_test, y_test))
            print('=' * 75, '\n')
        model = VotingRegressor(voting_list)
        model = model.fit(X_train, y_train)
        print('=' * 75, '\n')
        print(f"{df_key}, Voting Test MSE = ",
              mean_squared_error(y_test, model.predict(X_test)))
        print(f"{df_key}, Voting Test Score", model.score(X_test, y_test))
        print('=' * 75, '\n\n')
Ejemplo n.º 5
0
def voting_predictions(data, base_models, val=True):

    data = data_copy(data)
    Xtrain, Xtest, y = data

    index = 0
    vote_params = []
    for base_model in base_models:
        name = 'model' + str(index)
        index += 1
        model = base_model[0]
        params = base_model[1]
        model = model(**params)
        result = (name, model)
        vote_params.append(result)

    votemodel = VotingRegressor(vote_params)
    votemodel.fit(Xtrain, y)
    y_pred = votemodel.predict(Xtest)
    y_pred = np.exp(y_pred)

    if val:
        k_fold_crossval(data, model=votemodel)

    y_pred = np.exp(y_pred)

    return y_pred
Ejemplo n.º 6
0
    def voting_regressor(self):
        estimators_num = 10
        regs = {
            'GBR': GradientBoostingRegressor(
                random_state=1, n_estimators=estimators_num),
            'RF': RandomForestRegressor(
                random_state=1, n_estimators=estimators_num,
                n_jobs=-1),
            'LR': LinearRegression(),
        }
        ereg_estimators = []
        ereg_name = ''
        for idx, (name, reg) in enumerate(regs.items()):
            ereg_estimators.append((name, reg))
            ereg_name += f'{name}_'

        ereg = VotingRegressor(estimators=ereg_estimators,
                               n_jobs=-1)
        ereg.fit(self.X_train, self.y_train)
        y_pred = ereg.predict(self.X_test)

        root_dir = ('/Users/lujingze/Programming/SWFusion/'
                    'regression/tc/lightgbm/model/')
        ereg_dir = f'{root_dir}{ereg_name[:-1]}/'
        os.makedirs(ereg_dir, exist_ok=True)

        dump(ereg, f'{ereg_dir}voting_model.joblib')

        with open(f'{ereg_dir}test_pred.pkl', 'wb') as f:
            pickle.dump(y_pred, f)
Ejemplo n.º 7
0
def rainfall_runoff(precip_file, delineated_file, discharge_file, plot_fname):
    # give precipitation data and delineated watershed data as input
    # inputs should be .mat only
    precip_mat = loadmat(precip_file)['basin_daily_precipitation']
    basin_mat_delineated = loadmat(delineated_file)['basin_mat_delineated']

    # read discharge data as .xls input
    discharge_df = pd.ExcelFile(discharge_file)
    discharge_df = discharge_df.parse(0)
    discharge_df = discharge_df.fillna(0)  # Replace the nan values with 0's

    basin_num = 5
    reg1 = RandomForestRegressor(n_estimators=100, random_state=42)
    reg4 = BaggingRegressor(n_estimators=100, random_state=50)
    voting_reg = VotingRegressor([('br', reg4), ('rf', reg1)])

    X, y = get_data(discharge_df, basin_num, precip_mat, basin_mat_delineated,
                    False)
    voting_reg.fit(X, y)

    y_pred = voting_reg.predict(X)

    plt.scatter(y_pred, y_pred - y, c='r')
    plt.title("Runoff prediction data using a voting-regressor")
    plt.xlabel("Predicted Output")
    plt.ylabel("Error in prediction")
    print(plot_fname)
    plt.savefig(plot_fname)
Ejemplo n.º 8
0
    def reg_fit_predict(self,
                        x_train,
                        x_test,
                        y_train,
                        y_test,
                        est_name,
                        report_flg=True):
        if est_name == 'vote':
            if len(self.regression_estimators.keys()) > 1:
                print(self.regression_estimators.items())
                model = VotingRegressor(
                    estimators=self.regression_estimators.items())
            else:
                print('Caution: No models')
                return
        else:
            model = self.base_regression_estimators[est_name]

        model.fit(x_train, y_train)

        # predict test data
        y_pred = model.predict(x_test)

        # report scores
        if report_flg == True:
            self.reg_score_report(y_test, y_pred)

        # add model to dict
        self.regression_estimators[est_name] = model
        return
Ejemplo n.º 9
0
def vote_prediction(X_train, X_test, y_train, y_test, alpha, l1_ratio,
                    n_estimators, max_depth, c, gamma):
    # def vote_prediction(X_train, X_test, y_train, y_test, forest, svr):

    print("******************* VOTING ******************", end="\n\n")

    # forest = RandomForestRegressor(n_estimators=242, max_depth=5)
    # elasic_net = ElasticNet(alpha=0.141, l1_ratio=1.0)
    forest = RandomForestRegressor(n_estimators=n_estimators,
                                   max_depth=max_depth)
    # elasic_net = ElasticNet(alpha=alpha, l1_ratio=l1_ratio)
    # linear_regressor = LinearRegression()
    svr = SVR(kernel='rbf', C=c, gamma=gamma)

    voting_regressor = VotingRegressor(estimators=[
        ('rf', forest),
        # ('enet', elasic_net),
        # ('lr', linear_regressor),
        ('svr', svr)
    ])
    voting_regressor = voting_regressor.fit(X_train, y_train)

    y_pred = voting_regressor.predict(X_test)

    evaluate('Voting', y_test, y_pred, write_predictions=True)

    print("\n*********************************************", end="\n\n")
Ejemplo n.º 10
0
def model_fit_save(train_x, train_y, test_x, test_y):

    ## Training the model
    r1 = LinearRegression()
    #r2 = RandomForestRegressor(n_estimators=10, random_state=1)
    r3 = SVR(kernel='rbf')

    er = VotingRegressor([
        ('lr', r1),
        #('rf', r2),
        ('svr_rbf', r3)
    ])

    er.fit(train_x, train_y)

    ### Evaluating based on the train data
    y_pred = er.predict(test_x)
    print('Mean Absolute Error:', mean_absolute_error(test_y, y_pred))
    print('Mean Squared Error:', mean_squared_error(test_y, y_pred))
    print('Root Mean Squared Error:',
          np.sqrt(mean_squared_error(test_y, y_pred)))

    ## Saving the model
    # Save the model as a pickle in a file
    joblib.dump(er, 'model.pkl')
Ejemplo n.º 11
0
    def ensemble_lgb_regressor(self):
        try:
            root_dir = ('/Users/lujingze/Programming/SWFusion/'
                        'regression/tc/lightgbm/model/')
            model_dir = {
                'SG-FL': (f"""{root_dir}na_101.845662_fl_smogn_"""
                          f"""final_threshold_square_2/"""),
                'MSE': f'{root_dir}na_2.188733/',
            }
            er_name = ''
            estimators = []
            for idx, (name, out_dir) in enumerate(model_dir.items()):
                er_name += f'{name}_'
                save_file = [f for f in os.listdir(out_dir)
                             if f.endswith('.pkl')
                             and f.startswith(f'{self.basin}')]
                if len(save_file) != 1:
                    self.logger.error('Count of Bunch is not ONE')
                    exit(1)

                with open(f'{out_dir}{save_file[0]}', 'rb') as f:
                    best_result = pickle.load(f)

                estimators.append((name, best_result.model))

            er_name = er_name[:-1]
            er = VotingRegressor(estimators)
            er.fit(self.X_train, self.y_train)

            os.makedirs(f'{root_dir}{er_name[:-1]}/', exist_ok=True)
            y_pred = er.predict(self.X_test)
            y_pred.to_pickle(f'{er_dir}y_pred.pkl')
        except Exception as msg:
            breakpoint()
            exit(msg)
    def voting_compile_fit(self):

        #This funtion does compiling and fitting on VotingRegressor

        prev_mse = 0
        i = 0

        #We do n fitting and compling to find the best VotingRegressor
        while (i < self.n_repetition):

            if i == 0:
                self.voting_reg = VotingRegressor(estimators=self.reg_models)
                self.voting_reg.fit(self.X_train, self.y_train.values.ravel())
                y_pred = self.voting_reg.predict(self.X_test)
                prev_mse = mean_squared_error(self.y_test, y_pred)

                print(i + 1, ". ", "Voting_reg", prev_mse / 1000000)

            else:
                current_reg = VotingRegressor(estimators=self.reg_models)
                current_reg.fit(self.X_train, self.y_train.values.ravel())
                y_pred = current_reg.predict(self.X_test)
                mse = mean_squared_error(self.y_test, y_pred)

                print(i + 1, ". ", "Voting_reg", mse / 1000000)

                if mse < prev_mse:
                    self.voting_reg = current_reg
                    prev_mse = mse

            i = i + 1
Ejemplo n.º 13
0
    def train(self):
        self.gripperjack = self.gripperjack[0]
        self.location = self.location[0]
        generator = pg.generator_factory(self.type)

        self.df: pd.DataFrame = generator.generate(self.gripperjack,
                                                   self.location, 1)
        print(self.df.columns)
        self.df = self.df.drop(columns=['Timestamp']).dropna()

        print('DATAFRAME IS LOADED IN')
        x = None
        x_train = None
        x_test = None

        y = None
        y_train = None
        y_test = None

        regressor = None

        y = self.df.pop('next')

        x = self.df

        x_train, x_test, y_train, y_test = train_test_split(x,
                                                            y,
                                                            test_size=0.2,
                                                            shuffle=True)

        r = [('K Neighbour Regressor',
              KNeighborsRegressor(n_neighbors=15, n_jobs=5, leaf_size=50)),
             ('Random Forrest Regressor',
              RandomForestRegressor(n_estimators=200, n_jobs=5)),
             ('Ada Regressor',
              AdaBoostRegressor(n_estimators=100, learning_rate=0.1))]

        regressor = VotingRegressor(r, weights=[0.1, 1, 0.1])

        regressor.fit(x_train, y_train)
        print('===================')
        print('SCORE X/Y TEST')
        print(regressor.score(x_test, y_test))
        dump_location = 'Recources\\regressor_dumps\\' + self.type + '\\' + str(
            self.gripperjack) + '\\' + self.location

        print('==================')
        print('ACCURACY')
        y_pred = regressor.predict(x_test)
        mae = metrics.mean_absolute_error(y_test, y_pred)
        mape = (mae / (y.max() - y.min())) * 100
        print('MAE')
        print(mae)
        print('MAPE')
        print(mape)

        if not os.path.exists(dump_location):
            os.makedirs(dump_location)
        pickle.dump(regressor, open(dump_location + '\\regressor.sav', 'wb'))
        return mape
Ejemplo n.º 14
0
def test_notfitted():
    eclf = VotingClassifier(estimators=[('lr1', LogisticRegression()),
                                        ('lr2', LogisticRegression())],
                            voting='soft')
    ereg = VotingRegressor([('dr', DummyRegressor())])
    msg = ("This %s instance is not fitted yet. Call \'fit\'"
           " with appropriate arguments before using this estimator.")
    with pytest.raises(NotFittedError, match=msg % 'VotingClassifier'):
        eclf.predict(X)
    with pytest.raises(NotFittedError, match=msg % 'VotingClassifier'):
        eclf.predict_proba(X)
    with pytest.raises(NotFittedError, match=msg % 'VotingClassifier'):
        eclf.transform(X)
    with pytest.raises(NotFittedError, match=msg % 'VotingRegressor'):
        ereg.predict(X_r)
    with pytest.raises(NotFittedError, match=msg % 'VotingRegressor'):
        ereg.transform(X_r)
class VotingAggregationMethod(AggregationMethod):
    def __init__(self, config, train_values, train_labels, test_values,
                 logger):
        super().__init__(config, train_values, train_labels, test_values,
                         logger)
        self.model = VotingRegressor([
            ('random_forest', SVR(kernel='rbf', gamma=0.1)),
            ('krr', KernelRidge(kernel='rbf', gamma=0.1)),
            ('ada', AdaBoostRegressor()), ('rf', RandomForestRegressor()),
            ('et', ExtraTreesRegressor())
        ])

    def train_model(self, values, labels):
        self.model = self.model.fit(values, labels)
        return self.model.predict(values)

    def test_model(self, values):
        return self.model.predict(values)
Ejemplo n.º 16
0
def test_notfitted():
    eclf = VotingClassifier(
        estimators=[("lr1", LogisticRegression()),
                    ("lr2", LogisticRegression())],
        voting="soft",
    )
    ereg = VotingRegressor([("dr", DummyRegressor())])
    msg = ("This %s instance is not fitted yet. Call 'fit'"
           " with appropriate arguments before using this estimator.")
    with pytest.raises(NotFittedError, match=msg % "VotingClassifier"):
        eclf.predict(X)
    with pytest.raises(NotFittedError, match=msg % "VotingClassifier"):
        eclf.predict_proba(X)
    with pytest.raises(NotFittedError, match=msg % "VotingClassifier"):
        eclf.transform(X)
    with pytest.raises(NotFittedError, match=msg % "VotingRegressor"):
        ereg.predict(X_r)
    with pytest.raises(NotFittedError, match=msg % "VotingRegressor"):
        ereg.transform(X_r)
def train_voting_regressor(algos):

    vr = VotingRegressor(algos)
    vr.fit(X_train, y_train)

    y_pred = vr.predict(X_test1)

    r2 = r2_score(y_test1, y_pred)
    mae = mean_absolute_error(y_test1, y_pred)

    return vr, r2, mae
Ejemplo n.º 18
0
def regression_modeling(data, model):
    # Scaling the data
    scaled_data = preprocessing.StandardScaler().fit_transform(data)

    # Creating train-test
    X = scaled_data[:,0:8]
    y = scaled_data[:,8]

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

    models = [  ['linear', linear_model.Lasso(alpha=0.1).fit(X_train, y_train)],
                ['decision_tree' , DecisionTreeRegressor(random_state=0).fit(X_train, y_train)],
                ['ridge', linear_model.Ridge(alpha=.5).fit(X_train, y_train)],
                ['svm', svm.SVR(kernel='rbf', gamma='auto').fit(X_train, y_train)]]

    if model == 'all':
        for m in models:
            y_predicted = m[1].predict(X_test)
            r2 = r2_score(y_test, y_predicted)
            print("{}: {}".format(m[0], r2))

    if model == "lasso":
        lasso_reg = linear_model.Lasso(alpha=0.1).fit(X_train, y_train)
        y_hat_lasso = lasso_reg.predict(X_test)
        r2_lasso = r2_score(y_test, y_hat_lasso)
        print("R^2 score for Lasso:", r2_lasso)

    elif model == "decision_tree":
        dt_reg = DecisionTreeRegressor(random_state=0).fit(X_train, y_train)
        y_hat_dt = dt_reg.predict(X_test)
        r2_dt_reg = r2_score(y_test, y_hat_dt)
        print("R^2 score for Decision tree:", r2_dt_reg)

    elif model == "ridge":
        ridge_reg = linear_model.Ridge(alpha=.5).fit(X_train, y_train)
        y_hat_ridge = ridge_reg.predict(X_test)
        r2_ridge = r2_score(y_test, y_hat_ridge)
        print("R^2 score for Ridge:", r2_ridge)

    elif model == "svm":
        svm_reg = svm.SVR(kernel='rbf').fit(X_train, y_train)
        y_hat_svm = svm_reg.predict(X_test)
        r2_svm = r2_score(y_test, y_hat_svm)
        print("R^2 score for RBF SVM:", r2_svm)

    elif model == "voting":
        reg1 = GradientBoostingRegressor(random_state=1, n_estimators=10)
        reg2 = RandomForestRegressor(random_state=1, n_estimators=10)
        reg3 = LinearRegression()
        ereg = VotingRegressor(estimators=[('gb', reg1), ('rf', reg2), ('lr', reg3)])
        ereg = ereg.fit(X_train, y_train)
        y_hat_ereg = ereg.predict(X_test)
        r2_ereg = r2_score(y_test, y_hat_ereg)
        return r2_ereg
Ejemplo n.º 19
0
def get_training_goals(X, y, X_test):
    # 集成学习
    voting_reg = VotingRegressor(estimators=[
        ('rf_ploy', forest_polynomialregression(degree=3)),
        ('gb_ploy', gb_polynomialregression(degree=3)),
        ('ls_ploy', ls_polynomialregression(degree=3)),
        # ('rf_reg', RandomForestRegressor(n_estimators=100, oob_score=True, random_state=500)),
        # ('gb_reg', GradientBoostingRegressor(loss='ls', max_depth=3, max_leaf_nodes=10, min_samples_leaf=1, n_estimators=200, random_state=100)),
        # ('ls_reg', LassoCV(eps=1e-3, cv=4, max_iter=5000, random_state=100))
    ], weights=[0.2, 0.6, 0.2])
    voting_reg.fit(X, y)
    predict_y = voting_reg.predict(X_test)
    return predict_y
Ejemplo n.º 20
0
def vote_prediction_standalone(X_train, X_test, y_train, y_test):
    # def vote_prediction(X_train, X_test, y_train, y_test, forest, svr):

    print("******************* VOTING ******************", end="\n\n")

    param_dist = {'n_estimators': range(10, 320), 'max_depth': range(2, 50)}

    forest = RandomForestRegressor()
    rscv = RandomizedSearchCV(forest,
                              param_dist,
                              cv=10,
                              n_iter=100,
                              scoring=scoring,
                              n_jobs=JOBS,
                              verbose=1)

    Cs = [0.001, 0.01, 0.1, 1, 10, 100, 1000]
    gammas = [0.0001, 0.001, 0.01, 0.1, 1, 10, 100]
    param_grid = {'C': Cs, 'gamma': gammas}

    svr = GridSearchCV(SVR(kernel='rbf'),
                       param_grid,
                       cv=10,
                       scoring=scoring,
                       n_jobs=JOBS,
                       verbose=1)

    param_dist_en = {'alpha': stats.expon(0, 1), 'l1_ratio': stats.expon(0, 1)}

    enet = ElasticNet()
    model_cv = RandomizedSearchCV(enet,
                                  param_dist_en,
                                  cv=10,
                                  n_iter=100,
                                  scoring=scoring,
                                  n_jobs=JOBS,
                                  verbose=1)

    voting_regressor = VotingRegressor(estimators=[
        ('rf', rscv),
        ('enet', enet),
        # ('lr', linear_regressor),
        ('svr', svr)
    ])
    voting_regressor = voting_regressor.fit(X_train, y_train)

    y_pred = voting_regressor.predict(X_test)

    evaluate('Voting', y_test, y_pred, write_predictions=True)

    print("\n*********************************************", end="\n\n")
def steam_voting_predict_learned(data):
    """
    Runs the voting model with the values to predict already being in the model.
    """
    pre_learned_train = data[["positive_ratings_", "negative_ratings_", "owners_", "average_playtime_", "median_playtime_"]]
    pre_learned_label = data[["price_"]]

    gradient_boosting_model = GradientBoostingRegressor(random_state=1, n_estimators=20)
    random_forest_model = RandomForestRegressor(random_state=1, n_estimators=20)
    linear_regression_model = linear_model.LinearRegression()
    voting_model = VotingRegressor(estimators=[('gb', gradient_boosting_model), ('rf', random_forest_model), ('lr', linear_regression_model)])

    voting_model.fit(pre_learned_train, pre_learned_label.values.ravel())
    preds = voting_model.predict(pre_learned_train)
    mse = mean_squared_error(pre_learned_label, preds)
    return np.mean(mse)
def steam_best_model_test(data):
    """
    Fits the best model with 90% of our data then predicts on the remaining 10%.
    This simulates a "Real world situation"
    """
    best_train = data[["positive_ratings_", "negative_ratings_", "owners_", "average_playtime_", "median_playtime_"]]
    best_label = data[["price_"]]
    X_train, X_test, y_train, y_test = train_test_split(best_train, best_label, test_size=0.1, random_state=2)

    gradient_boosting_model = GradientBoostingRegressor(random_state=1, n_estimators=20)
    random_forest_model = RandomForestRegressor(random_state=1, n_estimators=20)
    linear_regression_model = linear_model.LinearRegression()
    voting_model = VotingRegressor(estimators=[('gb', gradient_boosting_model), ('rf', random_forest_model), ('lr', linear_regression_model)])

    voting_model.fit(X_train, y_train.values.ravel())
    preds = voting_model.predict(X_test)
    mse = mean_squared_error(y_test, preds)
    return np.mean(mse)
Ejemplo n.º 23
0
def get_flow(precip_file, delineated_file, discharge_file, D, T, file_name_b4_reg, file_name_after_reg):

    # give precipitation data and delineated watershed data as input
    # inputs should be .mat only
    precip_mat = loadmat(precip_file)['basin_daily_precipitation']
    basin_mat_delineated = loadmat(delineated_file)['basin_mat_delineated']
    print(basin_mat_delineated.shape)

    # read discharge data as .xls input
    discharge_df = pd.ExcelFile(discharge_file)
    discharge_df = discharge_df.parse(0)
    discharge_df = discharge_df.fillna(0) # Replace the nan values with 0's

    all_datetimes = discharge_df['Date']
    all_years = list(map(lambda datetime_obj: int(datetime_obj.date().strftime("%Y")), all_datetimes))
    years_list = list(set(all_years))
    
    discharge_df["Year"] = all_years

    # num days is D and num_years is T in the DQT format
    # D,T are USER INPUTS
    num_days = int(D)
    num_years = int(T)

    gather_dqt_plot(0, discharge_df, years_list, num_days, num_years, file_name_b4_reg)

    basin_num = 5
    reg1 = RandomForestRegressor(n_estimators=100, random_state=42)
    reg4 = BaggingRegressor(n_estimators=100, random_state=50)
    voting_reg = VotingRegressor([('br', reg4), ('rf', reg1)])

    X, y = get_data(discharge_df, basin_num, precip_mat, basin_mat_delineated, False)
    voting_reg.fit(X, y)

    new_discharge_df = deepcopy(discharge_df)
    new_discharge_df = new_discharge_df[(new_discharge_df["Year"] >= years_list[0]) & (new_discharge_df["Year"] <= years_list[-1])]
    print(len(discharge_df['Year']), len(new_discharge_df["Year"]))

    X, y = get_data(new_discharge_df, basin_num, precip_mat, basin_mat_delineated, True)
    y_pred = voting_reg.predict(X)
    new_discharge_df["New_Discharge"] = y_pred

    gather_dqt_plot(1, new_discharge_df, years_list, num_days, num_years, file_name_after_reg)
    def test_onnxt_iris_voting_regressor(self):
        iris = load_iris()
        X, y = iris.data, iris.target
        y = y.astype(numpy.float32)
        X_train, X_test, y_train, __ = train_test_split(X, y, random_state=11)
        clr = VotingRegressor(estimators=[(
            'lr',
            LinearRegression()), ('dt', DecisionTreeRegressor(max_depth=2))])
        clr.fit(X_train, y_train)
        X_test = X_test.astype(numpy.float32)
        X_test = numpy.vstack([X_test[:4], X_test[-4:]])
        res0 = clr.predict(X_test).astype(numpy.float32)

        model_def = to_onnx(clr, X_train.astype(numpy.float32))

        oinf = OnnxInference(model_def, runtime='python')
        res1 = oinf.run({'X': X_test})
        regs = DataFrame(res1['variable']).values
        self.assertEqualArray(res0, regs.ravel(), decimal=6)
Ejemplo n.º 25
0
    def voting(self, X, y, models, select=False):
        """
        Voting Regressor
        """
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            random_state=1,
                                                            test_size=0.35)

        regressor = VotingRegressor(models)

        regressor.fit(X_train, y_train.ravel())

        previsoes = self.y_scaler.inverse_transform(regressor.predict(X_test))
        y_test = self.y_scaler.inverse_transform(y_test)

        mae = mean_absolute_error(y_test, previsoes)

        self.plot_results(y_test, previsoes, 'Voting Regressor', mae, select)

        return regressor
Ejemplo n.º 26
0
    def reg_fit_predict_cv(self, est_name, x=None, y=None, report_flg=True):
        if est_name == 'vote':
            if len(self.regression_estimators.keys()) > 1:
                print(self.regression_estimators.items())
                model = VotingRegressor(
                    estimators=self.regression_estimators.items())
            else:
                print('Caution: No models')
                return
        else:
            model = self.base_regression_estimators[est_name]

        scores_list = {
            'mae': [],
            'mae_mean': [],
            'rmse': [],
            'rmse_mean': [],
        }

        for train_idx, test_idx in KFold(n_splits=5).split(x, y):
            x_train = x.loc[train_idx, :]
            y_train = y[train_idx]
            x_test = x.loc[test_idx, :]
            y_test = y[test_idx]

            model.fit(x_train, y_train)

            # predict test data
            y_pred = model.predict(x_test)
            scores = self.calc_reg_scores(y_test, y_pred)

            for k in scores_list.keys():
                scores_list[k].append(scores[k])

        for k in scores_list.keys():
            print(k + ': %.4f' % np.mean(scores_list[k]))

        # # add model to dict
        # self.regression_estimators[est_name] = model
        return y_pred
Ejemplo n.º 27
0
class BindingModel:

    model = None

    def __init__(self, n_jobs=-1, verbose=False, random_state=None):
        estimators = [('rf',
                       RandomForestRegressor(max_depth=3,
                                             random_state=random_state)),
                      ('lr', LinearRegression()), ('br', BayesianRidge()),
                      ('gb',
                       GradientBoostingRegressor(max_depth=4,
                                                 random_state=random_state))]
        self.estimators = estimators
        self.model = VotingRegressor(estimators,
                                     n_jobs=n_jobs,
                                     verbose=verbose)

    def fit(self, X, y):
        self.model.fit(X, y)

    def predict(self, X):
        return self.model.predict(X)
def plot_voting_regressor():
    X, y = load_diabetes(return_X_y=True)

    # Train classifiers
    reg1 = GradientBoostingRegressor(random_state=1)
    reg2 = RandomForestRegressor(random_state=1)
    reg3 = LinearRegression()

    reg1.fit(X, y)
    reg2.fit(X, y)
    reg3.fit(X, y)

    ereg = VotingRegressor([('gb', reg1), ('rf', reg2), ('lr', reg3)])
    ereg.fit(X, y)

    """ Making predictions """
    xt = X[:20]

    pred1 = reg1.predict(xt)
    pred2 = reg2.predict(xt)
    pred3 = reg3.predict(xt)
    pred4 = ereg.predict(xt)

    """ Plot the results """
    plt.figure()
    plt.plot(pred1, 'gd', label='GradientBoostingRegressor')
    plt.plot(pred2, 'b^', label='RandomForestRegressor')
    plt.plot(pred3, 'ys', label='LinearRegression')
    plt.plot(pred4, 'r*', ms=10, label='VotingRegressor')

    plt.tick_params(axis='x', which='both', bottom=False, top=False,
                    labelbottom=False)
    plt.ylabel('predicted')
    plt.xlabel('training samples')
    plt.legend(loc="best")
    plt.title('Regressor predictions and their average')

    plt.show()
Ejemplo n.º 29
0
def _regress():
    #------------Regression------------

    #knn
    knnr = KNeighborsRegressor()
    #logistic
    lr = LogisticRegression()
    #svm
    svr = LinearSVR()
    #nn
    mlpr = MLPRegressor()
    #xgboost
    xgbr = XGBRegressor()
    #voting
    votec = VotingRegressor(
        estimators=[('knnr', knnr), ('lr', lr), ('svr',
                                                 svr), ('mlpr',
                                                        mlpr), ('xgbr', xgbr)])
    votec = votec.fit(xtr, ytr_encoded)

    y_pred = votec.predict(xte)
    print()
    print(mean_squared_error(y_true=yte, y_pred=y_pred))
    print()
Ejemplo n.º 30
0
def regression_modeling(data):
    '''Models the response rate with Voting Regression'''
    # Scaling the data
    scaled_data = preprocessing.StandardScaler().fit_transform(data)

    # Creating train-test
    X = scaled_data[:, 0:8]
    y = scaled_data[:, 8]

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.3,
                                                        random_state=42)

    #Voting Regression
    reg1 = GradientBoostingRegressor(random_state=1, n_estimators=10)
    reg2 = RandomForestRegressor(random_state=1, n_estimators=10)
    reg3 = LinearRegression()
    ereg = VotingRegressor(estimators=[('gb', reg1), ('rf', reg2), ('lr',
                                                                    reg3)])
    ereg = ereg.fit(X_train, y_train)
    y_hat_ereg = ereg.predict(X_test)
    r2_ereg = r2_score(y_test, y_hat_ereg)
    return r2_ereg
Ejemplo n.º 31
0
#Ensemble, different k -> 0.06736

## final setup without ensemble-> 0.063695 , with -> 0.0635..
model = neighbors.KNeighborsRegressor(n_neighbors=best_k,
                                      algorithm='kd_tree',
                                      weights='distance')
model2 = neighbors.KNeighborsRegressor(n_neighbors=int(best_k / 2),
                                       algorithm='kd_tree',
                                       weights='distance')
model3 = neighbors.KNeighborsRegressor(n_neighbors=best_k * 2,
                                       algorithm='kd_tree',
                                       weights='distance')
model4 = neighbors.KNeighborsRegressor(n_neighbors=best_k - 2,
                                       algorithm='kd_tree',
                                       weights='distance')
model5 = neighbors.KNeighborsRegressor(n_neighbors=best_k + 2,
                                       algorithm='kd_tree',
                                       weights='distance')
ensemble = VotingRegressor([('m1', model), ('m2', model2), ('m3', model3),
                            ('m4', model4), ('m5', model5)],
                           weights=[1, 1, 1, 1, 1])
ensemble.fit(x_train, y_train)
# model.fit(x_train, y_train)
pred = ensemble.predict(x_test)  #make prediction on test set
error = mean_absolute_error(y_test, pred)  #calculate err
r2 = r2_score(y_test, pred)
print('MAE: ', error)
print('R2: ', r2)
error_RMSE = math.sqrt(mean_squared_error(y_test, pred))  #calculate err
print('RMSE value  is:', error_RMSE)