def reg_fit_predict(self,
                        x_train,
                        x_test,
                        y_train,
                        y_test,
                        est_name,
                        report_flg=True):
        if est_name == 'vote':
            if len(self.regression_estimators.keys()) > 1:
                print(self.regression_estimators.items())
                model = VotingRegressor(
                    estimators=self.regression_estimators.items())
            else:
                print('Caution: No models')
                return
        else:
            model = self.base_regression_estimators[est_name]

        model.fit(x_train, y_train)

        # predict test data
        y_pred = model.predict(x_test)

        # report scores
        if report_flg == True:
            self.reg_score_report(y_test, y_pred)

        # add model to dict
        self.regression_estimators[est_name] = model
        return
Exemple #2
0
def full_train():
    """
    Function to train the model on all of the available data. The trained model
    is saved as a pickle file.

    Returns
    -------
    Nothing is directly returned. The function saves the model in a pickle file
    for later usage in predictions.

    """
    X, y = df_prep_split()[0], df_prep_split()[1]
    ridge_reg = Ridge()
    forest_reg = RandomForestRegressor(
        max_features=8, n_estimators=100, n_jobs=-1
    )  # downscaled n_estimators from 500 due to memory issues on server
    boost_reg = GradientBoostingRegressor()
    ensemble_reg = VotingRegressor(estimators=[("ridge", ridge_reg),
                                               ("RF", forest_reg),
                                               ("GB", boost_reg)],
                                   n_jobs=-1)
    ensemble_reg.fit(X, y)
    PATH = os.environ.get(
        'HOME') + "/app/model.pickle"  # CHANGE PATH TO SERVER DIR
    return pickle.dump(ensemble_reg, open(PATH, "wb"))
Exemple #3
0
def train(features: List[str]):
    in_cols = [
        "climate_vs",
        "climate_def",
        "climate_vap",
        "climate_aet",
        "precipitation",
        "landcover_5",
    ]
    target_col = "burn_area"
    date_split = "2013-01-01"
    train_all = get_training_dataset()
    train_ = train_all.loc[train_all.date < date_split]
    valid_ = train_all.loc[train_all.date > date_split]

    X_train, y_train = train_[in_cols], train_[target_col]
    X_valid, y_valid = valid_[in_cols], valid_[target_col]

    xgb_model = xgb.XGBRegressor(n_estimators=300, max_depth=3, colsample_bytree=0.5, objective='reg:squarederror')
    
    xgb_model.fit(X_train, y_train)

    # cat_model=CatBoostRegressor(iterations=300, depth=5, learning_rate=0.1, loss_function='RMSE')
    # cat_model.fit(X_train, y_train,eval_set=(X_valid, y_valid),plot=True)

    lgb_model = lgb.LGBMRegressor(n_estimators=100, max_depth=8, num_leaves=6, objective="regression")
    lgb_model.fit(X_train, y_train)

    # voting_regressor = VotingRegressor([('xgb', xgb_model), ('cat', cat_model), ('lgb', lgb_model)])
    voting_regressor = VotingRegressor([('xgb', xgb_model), ('lgb', lgb_model)])
    voting_regressor.fit(X_train, y_train)

    return voting_regressor
Exemple #4
0
def model_fit_save(train_x, train_y, test_x, test_y):

    ## Training the model
    r1 = LinearRegression()
    #r2 = RandomForestRegressor(n_estimators=10, random_state=1)
    r3 = SVR(kernel='rbf')

    er = VotingRegressor([
        ('lr', r1),
        #('rf', r2),
        ('svr_rbf', r3)
    ])

    er.fit(train_x, train_y)

    ### Evaluating based on the train data
    y_pred = er.predict(test_x)
    print('Mean Absolute Error:', mean_absolute_error(test_y, y_pred))
    print('Mean Squared Error:', mean_squared_error(test_y, y_pred))
    print('Root Mean Squared Error:',
          np.sqrt(mean_squared_error(test_y, y_pred)))

    ## Saving the model
    # Save the model as a pickle in a file
    joblib.dump(er, 'model.pkl')
Exemple #5
0
    def ensemble_lgb_regressor(self):
        try:
            root_dir = ('/Users/lujingze/Programming/SWFusion/'
                        'regression/tc/lightgbm/model/')
            model_dir = {
                'SG-FL': (f"""{root_dir}na_101.845662_fl_smogn_"""
                          f"""final_threshold_square_2/"""),
                'MSE': f'{root_dir}na_2.188733/',
            }
            er_name = ''
            estimators = []
            for idx, (name, out_dir) in enumerate(model_dir.items()):
                er_name += f'{name}_'
                save_file = [f for f in os.listdir(out_dir)
                             if f.endswith('.pkl')
                             and f.startswith(f'{self.basin}')]
                if len(save_file) != 1:
                    self.logger.error('Count of Bunch is not ONE')
                    exit(1)

                with open(f'{out_dir}{save_file[0]}', 'rb') as f:
                    best_result = pickle.load(f)

                estimators.append((name, best_result.model))

            er_name = er_name[:-1]
            er = VotingRegressor(estimators)
            er.fit(self.X_train, self.y_train)

            os.makedirs(f'{root_dir}{er_name[:-1]}/', exist_ok=True)
            y_pred = er.predict(self.X_test)
            y_pred.to_pickle(f'{er_dir}y_pred.pkl')
        except Exception as msg:
            breakpoint()
            exit(msg)
def main():
    print(__doc__)

    import matplotlib.pyplot as plt

    from sklearn.datasets import load_diabetes
    from sklearn.ensemble import GradientBoostingRegressor
    from sklearn.ensemble import RandomForestRegressor
    from sklearn.linear_model import LinearRegression
    from sklearn.ensemble import VotingRegressor
    from sklearn.datasets import make_regression

    test_paths_file = "../test_paths_1.npy"
    val_paths_file = "../val_paths_1.npy"
    test_paths = np.load(test_paths_file, mmap_mode="r")
    val_paths = np.load(val_paths_file, mmap_mode="r")

    X, y = load_diabetes(return_X_y=True)

    # Train classifiers
    reg1 = GradientBoostingRegressor(random_state=1)
    reg2 = RandomForestRegressor(random_state=1)
    reg3 = LinearRegression()

    reg1.fit(X, y)
    reg2.fit(X, y)
    reg3.fit(X, y)

    ereg = VotingRegressor([('gb', reg1), ('rf', reg2), ('lr', reg3)])
    ereg.fit(X, y)
Exemple #7
0
def voting():
    # dtr model
    tuned_parameters = [{
        'criterion': ['mse', 'mae'],
        'max_depth': np.arange(1, 10)
    }]
    dtr = GridSearchCV(DecisionTreeRegressor(), tuned_parameters, cv=5)

    # rfr model
    tuned_parameters = {
        'min_samples_split': [3, 6, 9],
        'n_estimators': [10, 50, 100]
    }
    rfr = GridSearchCV(RandomForestRegressor(),
                       param_grid=tuned_parameters,
                       cv=5)

    # build voting model
    voting_reg = VotingRegressor(estimators=[('dtr_reg', dtr),
                                             ('rfr_reg', rfr)],
                                 weights=[1, 2])

    # fit the model using some training data
    voting_reg.fit(X_train, Y_train)

    # print the mean accuracy of testing predictions
    train_score = voting_reg.score(X_test, Y_test)

    # print the mean accuracy of testing predictions
    print("Accuracy score for final voting= " + str(round(train_score, 4)))
Exemple #8
0
    def voting_regressor(self):
        estimators_num = 10
        regs = {
            'GBR': GradientBoostingRegressor(
                random_state=1, n_estimators=estimators_num),
            'RF': RandomForestRegressor(
                random_state=1, n_estimators=estimators_num,
                n_jobs=-1),
            'LR': LinearRegression(),
        }
        ereg_estimators = []
        ereg_name = ''
        for idx, (name, reg) in enumerate(regs.items()):
            ereg_estimators.append((name, reg))
            ereg_name += f'{name}_'

        ereg = VotingRegressor(estimators=ereg_estimators,
                               n_jobs=-1)
        ereg.fit(self.X_train, self.y_train)
        y_pred = ereg.predict(self.X_test)

        root_dir = ('/Users/lujingze/Programming/SWFusion/'
                    'regression/tc/lightgbm/model/')
        ereg_dir = f'{root_dir}{ereg_name[:-1]}/'
        os.makedirs(ereg_dir, exist_ok=True)

        dump(ereg, f'{ereg_dir}voting_model.joblib')

        with open(f'{ereg_dir}test_pred.pkl', 'wb') as f:
            pickle.dump(y_pred, f)
Exemple #9
0
def rainfall_runoff(precip_file, delineated_file, discharge_file, plot_fname):
    # give precipitation data and delineated watershed data as input
    # inputs should be .mat only
    precip_mat = loadmat(precip_file)['basin_daily_precipitation']
    basin_mat_delineated = loadmat(delineated_file)['basin_mat_delineated']

    # read discharge data as .xls input
    discharge_df = pd.ExcelFile(discharge_file)
    discharge_df = discharge_df.parse(0)
    discharge_df = discharge_df.fillna(0)  # Replace the nan values with 0's

    basin_num = 5
    reg1 = RandomForestRegressor(n_estimators=100, random_state=42)
    reg4 = BaggingRegressor(n_estimators=100, random_state=50)
    voting_reg = VotingRegressor([('br', reg4), ('rf', reg1)])

    X, y = get_data(discharge_df, basin_num, precip_mat, basin_mat_delineated,
                    False)
    voting_reg.fit(X, y)

    y_pred = voting_reg.predict(X)

    plt.scatter(y_pred, y_pred - y, c='r')
    plt.title("Runoff prediction data using a voting-regressor")
    plt.xlabel("Predicted Output")
    plt.ylabel("Error in prediction")
    print(plot_fname)
    plt.savefig(plot_fname)
def test_weights_regressor():
    """Check weighted average regression prediction on boston dataset."""
    reg1 = DummyRegressor(strategy='mean')
    reg2 = DummyRegressor(strategy='median')
    reg3 = DummyRegressor(strategy='quantile', quantile=.2)
    ereg = VotingRegressor([('mean', reg1), ('median', reg2),
                            ('quantile', reg3)], weights=[1, 2, 10])

    X_r_train, X_r_test, y_r_train, y_r_test = \
        train_test_split(X_r, y_r, test_size=.25)

    reg1_pred = reg1.fit(X_r_train, y_r_train).predict(X_r_test)
    reg2_pred = reg2.fit(X_r_train, y_r_train).predict(X_r_test)
    reg3_pred = reg3.fit(X_r_train, y_r_train).predict(X_r_test)
    ereg_pred = ereg.fit(X_r_train, y_r_train).predict(X_r_test)

    avg = np.average(np.asarray([reg1_pred, reg2_pred, reg3_pred]), axis=0,
                     weights=[1, 2, 10])
    assert_almost_equal(ereg_pred, avg, decimal=2)

    ereg_weights_none = VotingRegressor([('mean', reg1), ('median', reg2),
                                         ('quantile', reg3)], weights=None)
    ereg_weights_equal = VotingRegressor([('mean', reg1), ('median', reg2),
                                          ('quantile', reg3)],
                                         weights=[1, 1, 1])
    ereg_weights_none.fit(X_r_train, y_r_train)
    ereg_weights_equal.fit(X_r_train, y_r_train)
    ereg_none_pred = ereg_weights_none.predict(X_r_test)
    ereg_equal_pred = ereg_weights_equal.predict(X_r_test)
    assert_almost_equal(ereg_none_pred, ereg_equal_pred, decimal=2)
Exemple #11
0
    def train(self):
        self.gripperjack = self.gripperjack[0]
        self.location = self.location[0]
        generator = pg.generator_factory(self.type)

        self.df: pd.DataFrame = generator.generate(self.gripperjack,
                                                   self.location, 1)
        print(self.df.columns)
        self.df = self.df.drop(columns=['Timestamp']).dropna()

        print('DATAFRAME IS LOADED IN')
        x = None
        x_train = None
        x_test = None

        y = None
        y_train = None
        y_test = None

        regressor = None

        y = self.df.pop('next')

        x = self.df

        x_train, x_test, y_train, y_test = train_test_split(x,
                                                            y,
                                                            test_size=0.2,
                                                            shuffle=True)

        r = [('K Neighbour Regressor',
              KNeighborsRegressor(n_neighbors=15, n_jobs=5, leaf_size=50)),
             ('Random Forrest Regressor',
              RandomForestRegressor(n_estimators=200, n_jobs=5)),
             ('Ada Regressor',
              AdaBoostRegressor(n_estimators=100, learning_rate=0.1))]

        regressor = VotingRegressor(r, weights=[0.1, 1, 0.1])

        regressor.fit(x_train, y_train)
        print('===================')
        print('SCORE X/Y TEST')
        print(regressor.score(x_test, y_test))
        dump_location = 'Recources\\regressor_dumps\\' + self.type + '\\' + str(
            self.gripperjack) + '\\' + self.location

        print('==================')
        print('ACCURACY')
        y_pred = regressor.predict(x_test)
        mae = metrics.mean_absolute_error(y_test, y_pred)
        mape = (mae / (y.max() - y.min())) * 100
        print('MAE')
        print(mae)
        print('MAPE')
        print(mape)

        if not os.path.exists(dump_location):
            os.makedirs(dump_location)
        pickle.dump(regressor, open(dump_location + '\\regressor.sav', 'wb'))
        return mape
Exemple #12
0
def voting_predictions(data, base_models, val=True):

    data = data_copy(data)
    Xtrain, Xtest, y = data

    index = 0
    vote_params = []
    for base_model in base_models:
        name = 'model' + str(index)
        index += 1
        model = base_model[0]
        params = base_model[1]
        model = model(**params)
        result = (name, model)
        vote_params.append(result)

    votemodel = VotingRegressor(vote_params)
    votemodel.fit(Xtrain, y)
    y_pred = votemodel.predict(Xtest)
    y_pred = np.exp(y_pred)

    if val:
        k_fold_crossval(data, model=votemodel)

    y_pred = np.exp(y_pred)

    return y_pred
    def voting_compile_fit(self):

        #This funtion does compiling and fitting on VotingRegressor

        prev_mse = 0
        i = 0

        #We do n fitting and compling to find the best VotingRegressor
        while (i < self.n_repetition):

            if i == 0:
                self.voting_reg = VotingRegressor(estimators=self.reg_models)
                self.voting_reg.fit(self.X_train, self.y_train.values.ravel())
                y_pred = self.voting_reg.predict(self.X_test)
                prev_mse = mean_squared_error(self.y_test, y_pred)

                print(i + 1, ". ", "Voting_reg", prev_mse / 1000000)

            else:
                current_reg = VotingRegressor(estimators=self.reg_models)
                current_reg.fit(self.X_train, self.y_train.values.ravel())
                y_pred = current_reg.predict(self.X_test)
                mse = mean_squared_error(self.y_test, y_pred)

                print(i + 1, ". ", "Voting_reg", mse / 1000000)

                if mse < prev_mse:
                    self.voting_reg = current_reg
                    prev_mse = mse

            i = i + 1
Exemple #14
0
def test_weights_regressor():
    """Check weighted average regression prediction on boston dataset."""
    reg1 = DummyRegressor(strategy='mean')
    reg2 = DummyRegressor(strategy='median')
    reg3 = DummyRegressor(strategy='quantile', quantile=.2)
    ereg = VotingRegressor([('mean', reg1), ('median', reg2),
                            ('quantile', reg3)],
                           weights=[1, 2, 10])

    X_r_train, X_r_test, y_r_train, y_r_test = \
        train_test_split(X_r, y_r, test_size=.25)

    reg1_pred = reg1.fit(X_r_train, y_r_train).predict(X_r_test)
    reg2_pred = reg2.fit(X_r_train, y_r_train).predict(X_r_test)
    reg3_pred = reg3.fit(X_r_train, y_r_train).predict(X_r_test)
    ereg_pred = ereg.fit(X_r_train, y_r_train).predict(X_r_test)

    avg = np.average(np.asarray([reg1_pred, reg2_pred, reg3_pred]),
                     axis=0,
                     weights=[1, 2, 10])
    assert_almost_equal(ereg_pred, avg, decimal=2)

    ereg_weights_none = VotingRegressor([('mean', reg1), ('median', reg2),
                                         ('quantile', reg3)],
                                        weights=None)
    ereg_weights_equal = VotingRegressor([('mean', reg1), ('median', reg2),
                                          ('quantile', reg3)],
                                         weights=[1, 1, 1])
    ereg_weights_none.fit(X_r_train, y_r_train)
    ereg_weights_equal.fit(X_r_train, y_r_train)
    ereg_none_pred = ereg_weights_none.predict(X_r_test)
    ereg_equal_pred = ereg_weights_equal.predict(X_r_test)
    assert_almost_equal(ereg_none_pred, ereg_equal_pred, decimal=2)
Exemple #15
0
def make_voting_regressor(y, x_vars):
    estimator_list = [("mlp", MLPRegressor(random_state=1, max_iter=250)),
                      ("random_forest", RandomForestRegressor(n_jobs=1)),
                      ("nearest_neighbor", KNeighborsRegressor(n_neighbors=4)),
                      ("decision_tree", DecisionTreeRegressor(random_state=0)),
                      ("gradient_boost",
                       GradientBoostingRegressor(random_state=0))]
    ereg = VotingRegressor(estimators=estimator_list)
    ereg.fit(x_vars, y)
    return ereg, {}
Exemple #16
0
 def run_ensemble_run(self, model_name = 'Ensemble'):
     reg1 = SVR(C=10, kernel= "rbf", epsilon = 0.1, gamma = 'auto')
     reg2 = KNeighborsRegressor(n_neighbors = 11)
     reg3 = RandomForestRegressor(n_estimators = 100)
     
     model = VotingRegressor([('RF', reg3)])
     model.fit(self.X_train, self.Y_train)
     
     self.evaluate_regression(self.Y_train, model.predict(self.X_train), self.dates_train, model_name+'-OnTrain', slicer = 1)
     self.evaluate_regression(self.Y_test, model.predict(self.X_test), self.dates_test, model_name+'-OnTest', slicer = 1)
def train_voting_regressor(algos):

    vr = VotingRegressor(algos)
    vr.fit(X_train, y_train)

    y_pred = vr.predict(X_test1)

    r2 = r2_score(y_test1, y_pred)
    mae = mean_absolute_error(y_test1, y_pred)

    return vr, r2, mae
Exemple #18
0
def get_training_goals(X, y, X_test):
    # 集成学习
    voting_reg = VotingRegressor(estimators=[
        ('rf_ploy', forest_polynomialregression(degree=3)),
        ('gb_ploy', gb_polynomialregression(degree=3)),
        ('ls_ploy', ls_polynomialregression(degree=3)),
        # ('rf_reg', RandomForestRegressor(n_estimators=100, oob_score=True, random_state=500)),
        # ('gb_reg', GradientBoostingRegressor(loss='ls', max_depth=3, max_leaf_nodes=10, min_samples_leaf=1, n_estimators=200, random_state=100)),
        # ('ls_reg', LassoCV(eps=1e-3, cv=4, max_iter=5000, random_state=100))
    ], weights=[0.2, 0.6, 0.2])
    voting_reg.fit(X, y)
    predict_y = voting_reg.predict(X_test)
    return predict_y
    def ensemble_of_best_params_xgb_reg(self, fn_name, space, algo, max_evals):
        best_params = self.params_to_ensemble(fn_name, space, algo, max_evals)

        models_to_voting = {}
        for i in range(len(best_params)):
            reg = xgb.XGBRegressor(**best_params[i])
            models_to_voting[str(i)] = reg

        model_ensemble = VotingRegressor([
            (name, model) for name, model in models_to_voting.items()
        ])
        model_ensemble.fit(self.data, self.labels)

        return model_ensemble, best_params
 def vote(self, model_path=None, dataset_number=1):
     # Trained regressors
     reg1 = load(r'sklearn_models7/HGBR1_DS{0}.joblib'.format(dataset_number))
     reg2 = load(r'sklearn_models7/RFR1_DS{0}.joblib'.format(dataset_number))
     reg3 = load(r'sklearn_models7/MLPR1_DS{0}.joblib'.format(dataset_number))
     # reg4 = load(r'sklearn_models7/SGDR1_DS1.joblib')
     ereg = VotingRegressor(
         [('HGBR1_DS{0}'.format(dataset_number), reg1),
          ('RFR1_DS{0}'.format(dataset_number), reg2),
          ('MLPR1_DS{0}'.format(dataset_number), reg3)],
         weights=[3. / 6., 2. / 6., 1. / 6.]
     )
     ereg.fit(self.X_train, self.y_train)
     dump(ereg, model_path)
     self.evaluate_model(model=ereg, model_path=model_path)
def steam_voting_predict_learned(data):
    """
    Runs the voting model with the values to predict already being in the model.
    """
    pre_learned_train = data[["positive_ratings_", "negative_ratings_", "owners_", "average_playtime_", "median_playtime_"]]
    pre_learned_label = data[["price_"]]

    gradient_boosting_model = GradientBoostingRegressor(random_state=1, n_estimators=20)
    random_forest_model = RandomForestRegressor(random_state=1, n_estimators=20)
    linear_regression_model = linear_model.LinearRegression()
    voting_model = VotingRegressor(estimators=[('gb', gradient_boosting_model), ('rf', random_forest_model), ('lr', linear_regression_model)])

    voting_model.fit(pre_learned_train, pre_learned_label.values.ravel())
    preds = voting_model.predict(pre_learned_train)
    mse = mean_squared_error(pre_learned_label, preds)
    return np.mean(mse)
Exemple #22
0
def trail_main():
    n_folds = 10
    train_path = 'data/assign3_students_train.txt'
    test_path = 'data/assign3_students_test.txt'
    train_data = read_process_data(train_path)
    test_data = read_process_data(test_path)
    models_dict = get_models()
    scores_dict = {}
    learned_models_dict = {}
    for df_key, df_val in train_data.items():
        X_train, X_test, y_train, y_test = get_final_score_tts(
            df_val.copy(), test_data[df_key].copy(), n_best=15)
        voting_list = []
        for model_key, model_val in models_dict.items():
            model = model_val.fit(X_train, y_train)
            name = f'{df_key}_{model_key}'
            learned_models_dict[name] = model
            voting_list.append((name, model))
            #         print(f"{name}, Train MSE ", mean_squared_error(y_train, model.predict(X_train)))
            #         print(f"{name}, Train RScore ", r2_score(y_train, model.predict(X_train)))
            #         print(f"{name}, Test RScore ", r2_score(y_test, model.predict(X_test)))
            print(f"X_test: {X_test.shape}, y_test: {y_test.shape}")
            print(f"{name}, Test MSE ",
                  mean_squared_error(y_test, model.predict(X_test)))
            print(f"{name}, Test Score", model.score(X_test, y_test))
            print('=' * 75, '\n')
        model = VotingRegressor(voting_list)
        model = model.fit(X_train, y_train)
        print('=' * 75, '\n')
        print(f"{df_key}, Voting Test MSE = ",
              mean_squared_error(y_test, model.predict(X_test)))
        print(f"{df_key}, Voting Test Score", model.score(X_test, y_test))
        print('=' * 75, '\n\n')
Exemple #23
0
def test_get_features_names_out_regressor():
    """Check get_feature_names_out output for regressor."""

    X = [[1, 2], [3, 4], [5, 6]]
    y = [0, 1, 2]

    voting = VotingRegressor(estimators=[
        ("lr", LinearRegression()),
        ("tree", DecisionTreeRegressor(random_state=0)),
        ("ignore", "drop"),
    ])
    voting.fit(X, y)

    names_out = voting.get_feature_names_out()
    expected_names = ["votingregressor_lr", "votingregressor_tree"]
    assert_array_equal(names_out, expected_names)
Exemple #24
0
def run_regressors():
    pyplot.plot(y_test, label='Actual')

    pyplot.legend()
    pyplot.xlabel('Time')
    pyplot.ylabel('USD/TRY')
    pyplot.show()

    # Voting Regressor
    reg1 = GradientBoostingRegressor(random_state=1, n_estimators=10)
    reg2 = RandomForestRegressor(random_state=1, n_estimators=10)
    reg3 = LinearRegression()
    model = VotingRegressor(estimators=[('gb', reg1), ('rf', reg2), ('lr', reg3)])
    model = model.fit(normalized_train_x, numpy.ravel(y_train))
    train_predict, test_predict = make_predictions(model, normalized_train_x, normalized_test_x)
    score_regressions('Voting Regressor', y_train, train_predict, y_test, test_predict)
    # score_classifications('Voting Regressor', y_train, train_predict, y_test, test_predict)
    # plot_graph(y_test, test_predict, 'Voting Regressor')

    voting = test_predict

    xgb = execute_model('Extreme Gradient Boost Regressor', {}, XGBRegressor)
    linearRegression = execute_model('Linear Regression Regressor', linearRegressionParameters, LinearRegression)
    ridge = execute_model('Ridge Regressor', ridgeParameters, Ridge)
    bayesianRidge = execute_model('Bayesian Ridge Regressor', bayesianRidgeParameters, BayesianRidge)
    lasso = execute_model('Lasso Regressor', lassoParameters, Lasso)
    lassoLars = execute_model('Lasso Lars Regressor', lassoLarsParameters, LassoLars)
    tweedie = execute_model('Tweedie Regressor', tweedieParameters, TweedieRegressor)
    svr = execute_model('SVR Regressor', svrParameters, SVR)
    sgd = execute_model('SGD Regressor', sgdParameters, SGDRegressor)
    kNeighbors = execute_model('K Neighbors Regressor', kNeighborsParameters, KNeighborsRegressor)
    gaussian = execute_model('Gaussian Process Regressor', gaussianProcessorParameters, GaussianProcessRegressor)
    mlp = execute_model('MLP Regressor ( FeedForward ANN )', mlpParameters, MLPRegressor)
Exemple #25
0
def vote_prediction(X_train, X_test, y_train, y_test, alpha, l1_ratio,
                    n_estimators, max_depth, c, gamma):
    # def vote_prediction(X_train, X_test, y_train, y_test, forest, svr):

    print("******************* VOTING ******************", end="\n\n")

    # forest = RandomForestRegressor(n_estimators=242, max_depth=5)
    # elasic_net = ElasticNet(alpha=0.141, l1_ratio=1.0)
    forest = RandomForestRegressor(n_estimators=n_estimators,
                                   max_depth=max_depth)
    # elasic_net = ElasticNet(alpha=alpha, l1_ratio=l1_ratio)
    # linear_regressor = LinearRegression()
    svr = SVR(kernel='rbf', C=c, gamma=gamma)

    voting_regressor = VotingRegressor(estimators=[
        ('rf', forest),
        # ('enet', elasic_net),
        # ('lr', linear_regressor),
        ('svr', svr)
    ])
    voting_regressor = voting_regressor.fit(X_train, y_train)

    y_pred = voting_regressor.predict(X_test)

    evaluate('Voting', y_test, y_pred, write_predictions=True)

    print("\n*********************************************", end="\n\n")
Exemple #26
0
 def voting_regressor_ensemble_3(self):
     lr, lr_pred = self.linear_regr()
     rf, rf_pred = self.random_forest_regr()
     er = VotingRegressor([
         ('lr', lr),
         ('rf', rf),
         ], n_jobs=-1)
     return er.fit(self.x_train, self.y_train).predict(self.x_test)
Exemple #27
0
def fit_best_model(
    feature_mapping=features_mapping_dict, best_params=best_params, save_to_disk=True
):
    # load data
    data = np.load("linear_regression_competition.train.npz")
    features, targets = data["data"], data["target"]

    # define models
    estimator_svr = Pipeline(
        steps=[
            ("preprocessing", preprocessing(features_mapping_dict)),
            ("estimator", SVR(**best_params["svr"])),
        ]
    )
    estimator_rf = Pipeline(
        steps=[
            ("preprocessing", preprocessing(features_mapping_dict)),
            ("estimator", RandomForestRegressor(**best_params["rf"])),
        ]
    )
    estimator_gb = Pipeline(
        steps=[
            ("preprocessing", preprocessing(features_mapping_dict)),
            ("estimator", GradientBoostingRegressor(**best_params["gb"])),
        ]
    )
    estimator_elastic_net = Pipeline(
        steps=[
            ("preprocessing", preprocessing(features_mapping_dict)),
            ("estimator", ElasticNet(**best_params["elastic_net"])),
        ]
    )

    voter = VotingRegressor(
        estimators=[
            ("gb", estimator_gb),
            ("rf", estimator_rf),
            ("lr", estimator_elastic_net),
            ("svr", estimator_svr),
        ]
    )

    voter.fit(features, targets)

    with open("linear_regression_competition.model", "wb") as model_file:
        pickle.dump(voter, model_file)
def steam_best_model_test(data):
    """
    Fits the best model with 90% of our data then predicts on the remaining 10%.
    This simulates a "Real world situation"
    """
    best_train = data[["positive_ratings_", "negative_ratings_", "owners_", "average_playtime_", "median_playtime_"]]
    best_label = data[["price_"]]
    X_train, X_test, y_train, y_test = train_test_split(best_train, best_label, test_size=0.1, random_state=2)

    gradient_boosting_model = GradientBoostingRegressor(random_state=1, n_estimators=20)
    random_forest_model = RandomForestRegressor(random_state=1, n_estimators=20)
    linear_regression_model = linear_model.LinearRegression()
    voting_model = VotingRegressor(estimators=[('gb', gradient_boosting_model), ('rf', random_forest_model), ('lr', linear_regression_model)])

    voting_model.fit(X_train, y_train.values.ravel())
    preds = voting_model.predict(X_test)
    mse = mean_squared_error(y_test, preds)
    return np.mean(mse)
Exemple #29
0
 def voting_regressor_ensemble_1(self):
     lr, lr_pred = self.linear_regr()
     lasso, lasso_pred = self.lasso_regr()
     rf, rf_pred = self.random_forest_regr()
     er = VotingRegressor([
         ('lr', lr),
         ('lasso', lasso),
         ("rf", rf)
         ], n_jobs=-1)
     return er.fit(self.x_train, self.y_train).predict(self.x_test)
def get_flow(precip_file, delineated_file, discharge_file, D, T, file_name_b4_reg, file_name_after_reg):

    # give precipitation data and delineated watershed data as input
    # inputs should be .mat only
    precip_mat = loadmat(precip_file)['basin_daily_precipitation']
    basin_mat_delineated = loadmat(delineated_file)['basin_mat_delineated']
    print(basin_mat_delineated.shape)

    # read discharge data as .xls input
    discharge_df = pd.ExcelFile(discharge_file)
    discharge_df = discharge_df.parse(0)
    discharge_df = discharge_df.fillna(0) # Replace the nan values with 0's

    all_datetimes = discharge_df['Date']
    all_years = list(map(lambda datetime_obj: int(datetime_obj.date().strftime("%Y")), all_datetimes))
    years_list = list(set(all_years))
    
    discharge_df["Year"] = all_years

    # num days is D and num_years is T in the DQT format
    # D,T are USER INPUTS
    num_days = int(D)
    num_years = int(T)

    gather_dqt_plot(0, discharge_df, years_list, num_days, num_years, file_name_b4_reg)

    basin_num = 5
    reg1 = RandomForestRegressor(n_estimators=100, random_state=42)
    reg4 = BaggingRegressor(n_estimators=100, random_state=50)
    voting_reg = VotingRegressor([('br', reg4), ('rf', reg1)])

    X, y = get_data(discharge_df, basin_num, precip_mat, basin_mat_delineated, False)
    voting_reg.fit(X, y)

    new_discharge_df = deepcopy(discharge_df)
    new_discharge_df = new_discharge_df[(new_discharge_df["Year"] >= years_list[0]) & (new_discharge_df["Year"] <= years_list[-1])]
    print(len(discharge_df['Year']), len(new_discharge_df["Year"]))

    X, y = get_data(new_discharge_df, basin_num, precip_mat, basin_mat_delineated, True)
    y_pred = voting_reg.predict(X)
    new_discharge_df["New_Discharge"] = y_pred

    gather_dqt_plot(1, new_discharge_df, years_list, num_days, num_years, file_name_after_reg)
    def test_onnxt_iris_voting_regressor(self):
        iris = load_iris()
        X, y = iris.data, iris.target
        y = y.astype(numpy.float32)
        X_train, X_test, y_train, __ = train_test_split(X, y, random_state=11)
        clr = VotingRegressor(estimators=[(
            'lr',
            LinearRegression()), ('dt', DecisionTreeRegressor(max_depth=2))])
        clr.fit(X_train, y_train)
        X_test = X_test.astype(numpy.float32)
        X_test = numpy.vstack([X_test[:4], X_test[-4:]])
        res0 = clr.predict(X_test).astype(numpy.float32)

        model_def = to_onnx(clr, X_train.astype(numpy.float32))

        oinf = OnnxInference(model_def, runtime='python')
        res1 = oinf.run({'X': X_test})
        regs = DataFrame(res1['variable']).values
        self.assertEqualArray(res0, regs.ravel(), decimal=6)