def voting_compile_fit(self):

        #This funtion does compiling and fitting on VotingRegressor

        prev_mse = 0
        i = 0

        #We do n fitting and compling to find the best VotingRegressor
        while (i < self.n_repetition):

            if i == 0:
                self.voting_reg = VotingRegressor(estimators=self.reg_models)
                self.voting_reg.fit(self.X_train, self.y_train.values.ravel())
                y_pred = self.voting_reg.predict(self.X_test)
                prev_mse = mean_squared_error(self.y_test, y_pred)

                print(i + 1, ". ", "Voting_reg", prev_mse / 1000000)

            else:
                current_reg = VotingRegressor(estimators=self.reg_models)
                current_reg.fit(self.X_train, self.y_train.values.ravel())
                y_pred = current_reg.predict(self.X_test)
                mse = mean_squared_error(self.y_test, y_pred)

                print(i + 1, ". ", "Voting_reg", mse / 1000000)

                if mse < prev_mse:
                    self.voting_reg = current_reg
                    prev_mse = mse

            i = i + 1
コード例 #2
0
ファイル: test_voting.py プロジェクト: zpf2012/scikit-learn
def test_weights_regressor():
    """Check weighted average regression prediction on boston dataset."""
    reg1 = DummyRegressor(strategy='mean')
    reg2 = DummyRegressor(strategy='median')
    reg3 = DummyRegressor(strategy='quantile', quantile=.2)
    ereg = VotingRegressor([('mean', reg1), ('median', reg2),
                            ('quantile', reg3)],
                           weights=[1, 2, 10])

    X_r_train, X_r_test, y_r_train, y_r_test = \
        train_test_split(X_r, y_r, test_size=.25)

    reg1_pred = reg1.fit(X_r_train, y_r_train).predict(X_r_test)
    reg2_pred = reg2.fit(X_r_train, y_r_train).predict(X_r_test)
    reg3_pred = reg3.fit(X_r_train, y_r_train).predict(X_r_test)
    ereg_pred = ereg.fit(X_r_train, y_r_train).predict(X_r_test)

    avg = np.average(np.asarray([reg1_pred, reg2_pred, reg3_pred]),
                     axis=0,
                     weights=[1, 2, 10])
    assert_almost_equal(ereg_pred, avg, decimal=2)

    ereg_weights_none = VotingRegressor([('mean', reg1), ('median', reg2),
                                         ('quantile', reg3)],
                                        weights=None)
    ereg_weights_equal = VotingRegressor([('mean', reg1), ('median', reg2),
                                          ('quantile', reg3)],
                                         weights=[1, 1, 1])
    ereg_weights_none.fit(X_r_train, y_r_train)
    ereg_weights_equal.fit(X_r_train, y_r_train)
    ereg_none_pred = ereg_weights_none.predict(X_r_test)
    ereg_equal_pred = ereg_weights_equal.predict(X_r_test)
    assert_almost_equal(ereg_none_pred, ereg_equal_pred, decimal=2)
コード例 #3
0
def create_model(tp='rf', rand=0):
    """Initialize a machine-learning model
    Parameters:
        tp - machine-learning approach: 'rf', 'gb', 'nn', 'voting', 'tree' or 'lm'
    Returns a sklearn model
    """
    if tp == 'rf':
        sklearn_model = RandomForestRegressor(random_state=rand,
                                              n_estimators=500)
    elif tp == 'lm':
        sklearn_model = LinearRegression()
    elif tp == 'tree':
        sklearn_model = DecisionTreeRegressor(random_state=rand, max_depth=10)
    elif tp == 'gb':
        sklearn_model = GradientBoostingRegressor(random_state=rand,
                                                  n_estimators=500)
    elif tp == 'nn':
        sklearn_model = MLPRegressor(random_state=rand,
                                     max_iter=500,
                                     hidden_layer_sizes=(500, ))
    elif tp == 'voting':
        reg1 = GradientBoostingRegressor(random_state=rand, n_estimators=500)
        reg2 = RandomForestRegressor(random_state=rand, n_estimators=500)
        reg3 = DecisionTreeRegressor(random_state=rand, max_depth=10)
        sklearn_model = VotingRegressor(
            estimators=[('gb', reg1), ('rf', reg2), ('tree', reg3)])
    else:
        print('Wrong model type!!!')
        return []

    if tp == 'nn':
        model = dc.models.SklearnModel(sklearn_model, use_weights=False)
    else:
        model = dc.models.SklearnModel(sklearn_model)
    return model
コード例 #4
0
    def reg_fit_predict(self,
                        x_train,
                        x_test,
                        y_train,
                        y_test,
                        est_name,
                        report_flg=True):
        if est_name == 'vote':
            if len(self.regression_estimators.keys()) > 1:
                print(self.regression_estimators.items())
                model = VotingRegressor(
                    estimators=self.regression_estimators.items())
            else:
                print('Caution: No models')
                return
        else:
            model = self.base_regression_estimators[est_name]

        model.fit(x_train, y_train)

        # predict test data
        y_pred = model.predict(x_test)

        # report scores
        if report_flg == True:
            self.reg_score_report(y_test, y_pred)

        # add model to dict
        self.regression_estimators[est_name] = model
        return
コード例 #5
0
def rainfall_runoff(precip_file, delineated_file, discharge_file, plot_fname):
    # give precipitation data and delineated watershed data as input
    # inputs should be .mat only
    precip_mat = loadmat(precip_file)['basin_daily_precipitation']
    basin_mat_delineated = loadmat(delineated_file)['basin_mat_delineated']

    # read discharge data as .xls input
    discharge_df = pd.ExcelFile(discharge_file)
    discharge_df = discharge_df.parse(0)
    discharge_df = discharge_df.fillna(0)  # Replace the nan values with 0's

    basin_num = 5
    reg1 = RandomForestRegressor(n_estimators=100, random_state=42)
    reg4 = BaggingRegressor(n_estimators=100, random_state=50)
    voting_reg = VotingRegressor([('br', reg4), ('rf', reg1)])

    X, y = get_data(discharge_df, basin_num, precip_mat, basin_mat_delineated,
                    False)
    voting_reg.fit(X, y)

    y_pred = voting_reg.predict(X)

    plt.scatter(y_pred, y_pred - y, c='r')
    plt.title("Runoff prediction data using a voting-regressor")
    plt.xlabel("Predicted Output")
    plt.ylabel("Error in prediction")
    print(plot_fname)
    plt.savefig(plot_fname)
コード例 #6
0
def voting():
    # dtr model
    tuned_parameters = [{
        'criterion': ['mse', 'mae'],
        'max_depth': np.arange(1, 10)
    }]
    dtr = GridSearchCV(DecisionTreeRegressor(), tuned_parameters, cv=5)

    # rfr model
    tuned_parameters = {
        'min_samples_split': [3, 6, 9],
        'n_estimators': [10, 50, 100]
    }
    rfr = GridSearchCV(RandomForestRegressor(),
                       param_grid=tuned_parameters,
                       cv=5)

    # build voting model
    voting_reg = VotingRegressor(estimators=[('dtr_reg', dtr),
                                             ('rfr_reg', rfr)],
                                 weights=[1, 2])

    # fit the model using some training data
    voting_reg.fit(X_train, Y_train)

    # print the mean accuracy of testing predictions
    train_score = voting_reg.score(X_test, Y_test)

    # print the mean accuracy of testing predictions
    print("Accuracy score for final voting= " + str(round(train_score, 4)))
コード例 #7
0
def voting_test():
    # create_holdout_data(outfile='./data/holdout_split.pkl')

    x_train, x_holdout, y_train, y_holdout = create_holdout_data(
        ratio=.10,
        seed=13,
        targets='distress_TQ',
        )

    estimators = [
        ('svm', SVR(kernel='rbf')),
        ('etree', ExtraTreesRegressor(1000, 'mae', random_state=13)),
        # ('gb', GradientBoostingRegressor())
        ]
    params = {
        # 'svm__kernel': ('linear', 'rbf'),
        'svm__C': (1, 10, 100, 100),
        'svm__gamma': (1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1),
        # 'svm__degree': (2, 3, 4, 5),
        # 'etree__n_estimators': (100, 500, 1000),
        # 'etree__criterion': ('mse', 'mae'),
    }

    reg = VotingRegressor(estimators=estimators)
    grid = GridSearchCV(estimator=reg, param_grid=params, cv=3, verbose=2)
    grid.fit(x_train, y_train)
    print(grid.best_params_)
    gridfile = './data/distress_TQ_VotingRegressor_GridSearchCV.pkl'
    with open(gridfile, 'wb') as file:
        pkl.dump(grid, file)
コード例 #8
0
def trail_main():
    n_folds = 10
    train_path = 'data/assign3_students_train.txt'
    test_path = 'data/assign3_students_test.txt'
    train_data = read_process_data(train_path)
    test_data = read_process_data(test_path)
    models_dict = get_models()
    scores_dict = {}
    learned_models_dict = {}
    for df_key, df_val in train_data.items():
        X_train, X_test, y_train, y_test = get_final_score_tts(
            df_val.copy(), test_data[df_key].copy(), n_best=15)
        voting_list = []
        for model_key, model_val in models_dict.items():
            model = model_val.fit(X_train, y_train)
            name = f'{df_key}_{model_key}'
            learned_models_dict[name] = model
            voting_list.append((name, model))
            #         print(f"{name}, Train MSE ", mean_squared_error(y_train, model.predict(X_train)))
            #         print(f"{name}, Train RScore ", r2_score(y_train, model.predict(X_train)))
            #         print(f"{name}, Test RScore ", r2_score(y_test, model.predict(X_test)))
            print(f"X_test: {X_test.shape}, y_test: {y_test.shape}")
            print(f"{name}, Test MSE ",
                  mean_squared_error(y_test, model.predict(X_test)))
            print(f"{name}, Test Score", model.score(X_test, y_test))
            print('=' * 75, '\n')
        model = VotingRegressor(voting_list)
        model = model.fit(X_train, y_train)
        print('=' * 75, '\n')
        print(f"{df_key}, Voting Test MSE = ",
              mean_squared_error(y_test, model.predict(X_test)))
        print(f"{df_key}, Voting Test Score", model.score(X_test, y_test))
        print('=' * 75, '\n\n')
コード例 #9
0
def model_fit_save(train_x, train_y, test_x, test_y):

    ## Training the model
    r1 = LinearRegression()
    #r2 = RandomForestRegressor(n_estimators=10, random_state=1)
    r3 = SVR(kernel='rbf')

    er = VotingRegressor([
        ('lr', r1),
        #('rf', r2),
        ('svr_rbf', r3)
    ])

    er.fit(train_x, train_y)

    ### Evaluating based on the train data
    y_pred = er.predict(test_x)
    print('Mean Absolute Error:', mean_absolute_error(test_y, y_pred))
    print('Mean Squared Error:', mean_squared_error(test_y, y_pred))
    print('Root Mean Squared Error:',
          np.sqrt(mean_squared_error(test_y, y_pred)))

    ## Saving the model
    # Save the model as a pickle in a file
    joblib.dump(er, 'model.pkl')
コード例 #10
0
ファイル: constructors.py プロジェクト: sahahn/BPt
    def _get_base_ensembler(self, models):

        # @TODO Might want to reflect choice of ensemble / model n_jobs here?

        # If wrapping in ensemble, set n_jobs for ensemble
        # and each indv model, make sure 1
        for model in models:
            try:
                model[1].n_jobs = 1
            except AttributeError:
                pass

            # Ensemble of des ensembles case
            if hasattr(model[1], 'estimators'):
                for estimator in model[1].estimators:
                    try:
                        estimator.n_jobs = 1
                    except AttributeError:
                        pass

        if self.spec['problem_type'] == 'regression':
            return VotingRegressor(models, n_jobs=self.spec['n_jobs'])

        return VotingClassifier(models, voting='soft',
                                n_jobs=self.spec['n_jobs'])
コード例 #11
0
def generate_ensemble_regressor(models_to_combine, X_train, y_train, X_test,
                                y_test):
    print(' - Generating ensemble model')

    ensemble = VotingRegressor(estimators=models_to_combine)
    cv = KFold(n_splits=5)
    results = cross_validate(ensemble,
                             X_train,
                             y_train,
                             cv=cv,
                             return_estimator=True,
                             scoring='r2')

    best_model = None
    best_scorer = 0

    for m, s in zip(results['estimator'], results['test_score']):
        if (best_model is None or best_scorer < s):
            best_scorer = s
            best_model = m

    y_pred = best_model.predict(X_test)

    print('   - Cross-validation results:')
    print('     - r2:', np.max(results['test_score']))
    print("   - Test set results:")
    print("     - r2:", metrics.r2_score(y_test, y_pred))

    return best_model
コード例 #12
0
ファイル: NewTrainer.py プロジェクト: dirk4/StageKremer
    def train(self):
        self.gripperjack = self.gripperjack[0]
        self.location = self.location[0]
        generator = pg.generator_factory(self.type)

        self.df: pd.DataFrame = generator.generate(self.gripperjack,
                                                   self.location, 1)
        print(self.df.columns)
        self.df = self.df.drop(columns=['Timestamp']).dropna()

        print('DATAFRAME IS LOADED IN')
        x = None
        x_train = None
        x_test = None

        y = None
        y_train = None
        y_test = None

        regressor = None

        y = self.df.pop('next')

        x = self.df

        x_train, x_test, y_train, y_test = train_test_split(x,
                                                            y,
                                                            test_size=0.2,
                                                            shuffle=True)

        r = [('K Neighbour Regressor',
              KNeighborsRegressor(n_neighbors=15, n_jobs=5, leaf_size=50)),
             ('Random Forrest Regressor',
              RandomForestRegressor(n_estimators=200, n_jobs=5)),
             ('Ada Regressor',
              AdaBoostRegressor(n_estimators=100, learning_rate=0.1))]

        regressor = VotingRegressor(r, weights=[0.1, 1, 0.1])

        regressor.fit(x_train, y_train)
        print('===================')
        print('SCORE X/Y TEST')
        print(regressor.score(x_test, y_test))
        dump_location = 'Recources\\regressor_dumps\\' + self.type + '\\' + str(
            self.gripperjack) + '\\' + self.location

        print('==================')
        print('ACCURACY')
        y_pred = regressor.predict(x_test)
        mae = metrics.mean_absolute_error(y_test, y_pred)
        mape = (mae / (y.max() - y.min())) * 100
        print('MAE')
        print(mae)
        print('MAPE')
        print(mape)

        if not os.path.exists(dump_location):
            os.makedirs(dump_location)
        pickle.dump(regressor, open(dump_location + '\\regressor.sav', 'wb'))
        return mape
コード例 #13
0
def train(features: List[str]):
    in_cols = [
        "climate_vs",
        "climate_def",
        "climate_vap",
        "climate_aet",
        "precipitation",
        "landcover_5",
    ]
    target_col = "burn_area"
    date_split = "2013-01-01"
    train_all = get_training_dataset()
    train_ = train_all.loc[train_all.date < date_split]
    valid_ = train_all.loc[train_all.date > date_split]

    X_train, y_train = train_[in_cols], train_[target_col]
    X_valid, y_valid = valid_[in_cols], valid_[target_col]

    xgb_model = xgb.XGBRegressor(n_estimators=300, max_depth=3, colsample_bytree=0.5, objective='reg:squarederror')
    
    xgb_model.fit(X_train, y_train)

    # cat_model=CatBoostRegressor(iterations=300, depth=5, learning_rate=0.1, loss_function='RMSE')
    # cat_model.fit(X_train, y_train,eval_set=(X_valid, y_valid),plot=True)

    lgb_model = lgb.LGBMRegressor(n_estimators=100, max_depth=8, num_leaves=6, objective="regression")
    lgb_model.fit(X_train, y_train)

    # voting_regressor = VotingRegressor([('xgb', xgb_model), ('cat', cat_model), ('lgb', lgb_model)])
    voting_regressor = VotingRegressor([('xgb', xgb_model), ('lgb', lgb_model)])
    voting_regressor.fit(X_train, y_train)

    return voting_regressor
コード例 #14
0
def vote_prediction(X_train, X_test, y_train, y_test, alpha, l1_ratio,
                    n_estimators, max_depth, c, gamma):
    # def vote_prediction(X_train, X_test, y_train, y_test, forest, svr):

    print("******************* VOTING ******************", end="\n\n")

    # forest = RandomForestRegressor(n_estimators=242, max_depth=5)
    # elasic_net = ElasticNet(alpha=0.141, l1_ratio=1.0)
    forest = RandomForestRegressor(n_estimators=n_estimators,
                                   max_depth=max_depth)
    # elasic_net = ElasticNet(alpha=alpha, l1_ratio=l1_ratio)
    # linear_regressor = LinearRegression()
    svr = SVR(kernel='rbf', C=c, gamma=gamma)

    voting_regressor = VotingRegressor(estimators=[
        ('rf', forest),
        # ('enet', elasic_net),
        # ('lr', linear_regressor),
        ('svr', svr)
    ])
    voting_regressor = voting_regressor.fit(X_train, y_train)

    y_pred = voting_regressor.predict(X_test)

    evaluate('Voting', y_test, y_pred, write_predictions=True)

    print("\n*********************************************", end="\n\n")
コード例 #15
0
ファイル: regression.py プロジェクト: Neo-101/R2S
    def ensemble_lgb_regressor(self):
        try:
            root_dir = ('/Users/lujingze/Programming/SWFusion/'
                        'regression/tc/lightgbm/model/')
            model_dir = {
                'SG-FL': (f"""{root_dir}na_101.845662_fl_smogn_"""
                          f"""final_threshold_square_2/"""),
                'MSE': f'{root_dir}na_2.188733/',
            }
            er_name = ''
            estimators = []
            for idx, (name, out_dir) in enumerate(model_dir.items()):
                er_name += f'{name}_'
                save_file = [f for f in os.listdir(out_dir)
                             if f.endswith('.pkl')
                             and f.startswith(f'{self.basin}')]
                if len(save_file) != 1:
                    self.logger.error('Count of Bunch is not ONE')
                    exit(1)

                with open(f'{out_dir}{save_file[0]}', 'rb') as f:
                    best_result = pickle.load(f)

                estimators.append((name, best_result.model))

            er_name = er_name[:-1]
            er = VotingRegressor(estimators)
            er.fit(self.X_train, self.y_train)

            os.makedirs(f'{root_dir}{er_name[:-1]}/', exist_ok=True)
            y_pred = er.predict(self.X_test)
            y_pred.to_pickle(f'{er_dir}y_pred.pkl')
        except Exception as msg:
            breakpoint()
            exit(msg)
コード例 #16
0
ファイル: regression.py プロジェクト: Neo-101/R2S
    def voting_regressor(self):
        estimators_num = 10
        regs = {
            'GBR': GradientBoostingRegressor(
                random_state=1, n_estimators=estimators_num),
            'RF': RandomForestRegressor(
                random_state=1, n_estimators=estimators_num,
                n_jobs=-1),
            'LR': LinearRegression(),
        }
        ereg_estimators = []
        ereg_name = ''
        for idx, (name, reg) in enumerate(regs.items()):
            ereg_estimators.append((name, reg))
            ereg_name += f'{name}_'

        ereg = VotingRegressor(estimators=ereg_estimators,
                               n_jobs=-1)
        ereg.fit(self.X_train, self.y_train)
        y_pred = ereg.predict(self.X_test)

        root_dir = ('/Users/lujingze/Programming/SWFusion/'
                    'regression/tc/lightgbm/model/')
        ereg_dir = f'{root_dir}{ereg_name[:-1]}/'
        os.makedirs(ereg_dir, exist_ok=True)

        dump(ereg, f'{ereg_dir}voting_model.joblib')

        with open(f'{ereg_dir}test_pred.pkl', 'wb') as f:
            pickle.dump(y_pred, f)
コード例 #17
0
def run_regressors():
    pyplot.plot(y_test, label='Actual')

    pyplot.legend()
    pyplot.xlabel('Time')
    pyplot.ylabel('USD/TRY')
    pyplot.show()

    # Voting Regressor
    reg1 = GradientBoostingRegressor(random_state=1, n_estimators=10)
    reg2 = RandomForestRegressor(random_state=1, n_estimators=10)
    reg3 = LinearRegression()
    model = VotingRegressor(estimators=[('gb', reg1), ('rf', reg2), ('lr', reg3)])
    model = model.fit(normalized_train_x, numpy.ravel(y_train))
    train_predict, test_predict = make_predictions(model, normalized_train_x, normalized_test_x)
    score_regressions('Voting Regressor', y_train, train_predict, y_test, test_predict)
    # score_classifications('Voting Regressor', y_train, train_predict, y_test, test_predict)
    # plot_graph(y_test, test_predict, 'Voting Regressor')

    voting = test_predict

    xgb = execute_model('Extreme Gradient Boost Regressor', {}, XGBRegressor)
    linearRegression = execute_model('Linear Regression Regressor', linearRegressionParameters, LinearRegression)
    ridge = execute_model('Ridge Regressor', ridgeParameters, Ridge)
    bayesianRidge = execute_model('Bayesian Ridge Regressor', bayesianRidgeParameters, BayesianRidge)
    lasso = execute_model('Lasso Regressor', lassoParameters, Lasso)
    lassoLars = execute_model('Lasso Lars Regressor', lassoLarsParameters, LassoLars)
    tweedie = execute_model('Tweedie Regressor', tweedieParameters, TweedieRegressor)
    svr = execute_model('SVR Regressor', svrParameters, SVR)
    sgd = execute_model('SGD Regressor', sgdParameters, SGDRegressor)
    kNeighbors = execute_model('K Neighbors Regressor', kNeighborsParameters, KNeighborsRegressor)
    gaussian = execute_model('Gaussian Process Regressor', gaussianProcessorParameters, GaussianProcessRegressor)
    mlp = execute_model('MLP Regressor ( FeedForward ANN )', mlpParameters, MLPRegressor)
コード例 #18
0
def main():
    print(__doc__)

    import matplotlib.pyplot as plt

    from sklearn.datasets import load_diabetes
    from sklearn.ensemble import GradientBoostingRegressor
    from sklearn.ensemble import RandomForestRegressor
    from sklearn.linear_model import LinearRegression
    from sklearn.ensemble import VotingRegressor
    from sklearn.datasets import make_regression

    test_paths_file = "../test_paths_1.npy"
    val_paths_file = "../val_paths_1.npy"
    test_paths = np.load(test_paths_file, mmap_mode="r")
    val_paths = np.load(val_paths_file, mmap_mode="r")

    X, y = load_diabetes(return_X_y=True)

    # Train classifiers
    reg1 = GradientBoostingRegressor(random_state=1)
    reg2 = RandomForestRegressor(random_state=1)
    reg3 = LinearRegression()

    reg1.fit(X, y)
    reg2.fit(X, y)
    reg3.fit(X, y)

    ereg = VotingRegressor([('gb', reg1), ('rf', reg2), ('lr', reg3)])
    ereg.fit(X, y)
コード例 #19
0
ファイル: ML.py プロジェクト: livecitylab/brl-flask
def full_train():
    """
    Function to train the model on all of the available data. The trained model
    is saved as a pickle file.

    Returns
    -------
    Nothing is directly returned. The function saves the model in a pickle file
    for later usage in predictions.

    """
    X, y = df_prep_split()[0], df_prep_split()[1]
    ridge_reg = Ridge()
    forest_reg = RandomForestRegressor(
        max_features=8, n_estimators=100, n_jobs=-1
    )  # downscaled n_estimators from 500 due to memory issues on server
    boost_reg = GradientBoostingRegressor()
    ensemble_reg = VotingRegressor(estimators=[("ridge", ridge_reg),
                                               ("RF", forest_reg),
                                               ("GB", boost_reg)],
                                   n_jobs=-1)
    ensemble_reg.fit(X, y)
    PATH = os.environ.get(
        'HOME') + "/app/model.pickle"  # CHANGE PATH TO SERVER DIR
    return pickle.dump(ensemble_reg, open(PATH, "wb"))
コード例 #20
0
def voting_predictions(data, base_models, val=True):

    data = data_copy(data)
    Xtrain, Xtest, y = data

    index = 0
    vote_params = []
    for base_model in base_models:
        name = 'model' + str(index)
        index += 1
        model = base_model[0]
        params = base_model[1]
        model = model(**params)
        result = (name, model)
        vote_params.append(result)

    votemodel = VotingRegressor(vote_params)
    votemodel.fit(Xtrain, y)
    y_pred = votemodel.predict(Xtest)
    y_pred = np.exp(y_pred)

    if val:
        k_fold_crossval(data, model=votemodel)

    y_pred = np.exp(y_pred)

    return y_pred
コード例 #21
0
 def voting_regressor(X, y):
     regressors = [
         MyDummyRegressor(config=1, random_state=0).fit(X, y)
         for _ in range(5)
     ]
     vr = VotingRegressor(estimators=None)
     vr.estimators_ = regressors
     return vr
コード例 #22
0
 def voting_regressor_ensemble_3(self):
     lr, lr_pred = self.linear_regr()
     rf, rf_pred = self.random_forest_regr()
     er = VotingRegressor([
         ('lr', lr),
         ('rf', rf),
         ], n_jobs=-1)
     return er.fit(self.x_train, self.y_train).predict(self.x_test)
コード例 #23
0
 def get_voting(self):
     if self.case == 'classifier':
         ensemble = VotingClassifier(estimators=self.models,
                                     voting=self.voting,
                                     weights=self.weights)
     else:
         ensemble = VotingRegressor(estimators=self.models,
                                    weights=self.weights)
     return ensemble
コード例 #24
0
 def test_voting_regression(self):
     model = VotingRegressor([('lr', LinearRegression()),
                              ('dt', DecisionTreeRegressor())])
     model, _ = fit_regression_model(model)
     names = list(enumerate_model_names(model))
     assert len(names) == 3
     assert [_[0] for _ in names] == ['', 'lr', 'dt']
     assert all(map(lambda x: isinstance(x, tuple), names))
     assert all(map(lambda x: len(x) == 2, names))
コード例 #25
0
def create_model_sec2():
    
    # Pré-processamento (s/ fit):
    text_col = 'tit_org_ementa_text'
    colunas_relevantes = ['tipo_edicao'] + [text_col]

    stopwords = ['de', 'a', 'o', 'que', 'e', 'é', 'do', 'da', 'em', 'um', 'para', 'com', 'não', 'uma', 'os', 'no', 'se', 'na', 'por', 'mais',
                 'as', 'dos', 'como', 'mas', 'ao', 'ele', 'das', 'à', 'seu', 'sua', 'ou', 'quando', 'muito', 'nos', 'já', 'eu', 'também',
                 'só', 'pelo', 'pela', 'até', 'isso', 'ela', 'entre', 'depois', 'sem', 'mesmo', 'aos', 'seus', 'quem', 'nas', 'me', 'esse',
                 'eles', 'você', 'essa', 'num', 'nem', 'suas', 'meu', 'às', 'minha', 'numa', 'pelos', 'elas', 'qual', 'nós', 'lhe', 'deles',
                 'essas', 'esses', 'pelas', 'este', 'dele', 'tu', 'te', 'vocês', 'vos', 'lhes', 'meus', 'minhas', 'teu', 'tua', 'teus',
                 'tuas', 'nosso', 'nossa', 'nossos', 'nossas', 'dela', 'delas', 'esta', 'estes', 'estas', 'aquele', 'aquela', 'aqueles',
                 'aquelas', 'isto', 'aquilo', 'estou', 'está', 'estamos', 'estão', 'estive', 'esteve', 'estivemos', 'estiveram', 'estava',
                 'estávamos', 'estavam', 'estivera', 'estivéramos', 'esteja', 'estejamos', 'estejam', 'estivesse', 'estivéssemos',
                 'estivessem', 'estiver', 'estivermos', 'estiverem', 'hei', 'há', 'havemos', 'hão', 'houve', 'houvemos', 'houveram', 'houvera',
                 'houvéramos', 'haja', 'hajamos', 'hajam', 'houvesse', 'houvéssemos', 'houvessem', 'houver', 'houvermos', 'houverem', 'houverei',
                 'houverá', 'houveremos', 'houverão', 'houveria', 'houveríamos', 'houveriam', 'sou', 'somos', 'são', 'era', 'éramos', 'eram',
                 'fui', 'foi', 'fomos', 'foram', 'fora', 'fôramos', 'seja', 'sejamos', 'sejam', 'fosse', 'fôssemos', 'fossem', 'for', 'formos',
                 'forem', 'serei', 'será', 'seremos', 'serão', 'seria', 'seríamos', 'seriam', 'tenho', 'tem', 'temos', 'tém', 'tinha', 'tínhamos',
                 'tinham', 'tive', 'teve', 'tivemos', 'tiveram', 'tivera', 'tivéramos', 'tenha', 'tenhamos', 'tenham', 'tivesse', 'tivéssemos',
                 'tivessem', 'tiver', 'tivermos', 'tiverem', 'terei', 'terá', 'teremos', 'terão', 'teria', 'teríamos', 'teriam']

    dou_extractor = PreprocessDOU(colunas_relevantes, ' xxnuloxx ')
    

    proc_text = PreProcessText(cargo_tokens=True, lowercase=True, remove_punctuation=True, keep_cash=True, 
                              stopwords=stopwords, stemmer=None, strip_accents=False, only_letters=False,
                              text_cols=[text_col])
    

    # Fit processing and model:
    keywords = ['xxdasdoisxx', 'xxdastresxx', 'xxdasumxx', 'xxfcpedoisxx', 'xxfcpetresxx', 'xxfcpeumxx', 
                'substituto','substituta', 'substituir', 'substituto eventual', 'substituta eventual'] # 0.925 
    anti_keywords = ['cargo', 'ambiente', 'comissão', 'comissionada', 'educação', 'gabinete', 'meio', 'pessoa', 
                     'Tecnologia', 'Tecnologia da', 'da Informação', 'Pessoa com', 'Igualdade', 'geral',
                     'cargo de', 'regional', 'comissão de', 'comissionada de', 'comissionado', 'comissionado de',
                     'eventual']
    keywords_df        = pd.DataFrame({text_col: keywords})
    anti_keywords_df   = pd.DataFrame({text_col: anti_keywords})
    proc_keywords      = list(proc_text.transform(keywords_df)[text_col].values)
    proc_anti_keywords = list(proc_text.transform(anti_keywords_df)[text_col].values)
    vectorizer = WeightedVectorizer(lowercase=False, binary=True, ngram_range=(1,2), max_df=0.2, min_df=1,
                                    keywords=proc_keywords, anti_keywords=proc_anti_keywords, keywords_weight=10)
    
    encoder_extra = OneHotEncoder(drop='first')
    processor     = ColumnTransformer([('vec',   vectorizer,    text_col),
                                       ('extra', encoder_extra, ['tipo_edicao'])])

    #classifier  = Ridge(20)
    classifier = VotingRegressor([('ridge', Ridge(80)), 
                                  ('svr', SVR(C=30)),
                                 ('forest', RandomForestRegressor(max_depth=6, min_samples_split=2, n_estimators=11))])

    pipeline = Pipeline([('dou', dou_extractor), ('pretext', proc_text), ('proc', processor), ('fit', classifier)])

    return pipeline
コード例 #26
0
 def voting_regressor_ensemble_1(self):
     lr, lr_pred = self.linear_regr()
     lasso, lasso_pred = self.lasso_regr()
     rf, rf_pred = self.random_forest_regr()
     er = VotingRegressor([
         ('lr', lr),
         ('lasso', lasso),
         ("rf", rf)
         ], n_jobs=-1)
     return er.fit(self.x_train, self.y_train).predict(self.x_test)
コード例 #27
0
 def run_ensemble_run(self, model_name = 'Ensemble'):
     reg1 = SVR(C=10, kernel= "rbf", epsilon = 0.1, gamma = 'auto')
     reg2 = KNeighborsRegressor(n_neighbors = 11)
     reg3 = RandomForestRegressor(n_estimators = 100)
     
     model = VotingRegressor([('RF', reg3)])
     model.fit(self.X_train, self.Y_train)
     
     self.evaluate_regression(self.Y_train, model.predict(self.X_train), self.dates_train, model_name+'-OnTrain', slicer = 1)
     self.evaluate_regression(self.Y_test, model.predict(self.X_test), self.dates_test, model_name+'-OnTest', slicer = 1)
コード例 #28
0
def make_voting_regressor(y, x_vars):
    estimator_list = [("mlp", MLPRegressor(random_state=1, max_iter=250)),
                      ("random_forest", RandomForestRegressor(n_jobs=1)),
                      ("nearest_neighbor", KNeighborsRegressor(n_neighbors=4)),
                      ("decision_tree", DecisionTreeRegressor(random_state=0)),
                      ("gradient_boost",
                       GradientBoostingRegressor(random_state=0))]
    ereg = VotingRegressor(estimators=estimator_list)
    ereg.fit(x_vars, y)
    return ereg, {}
コード例 #29
0
 def __init__(self, config, train_values, train_labels, test_values,
              logger):
     super().__init__(config, train_values, train_labels, test_values,
                      logger)
     self.model = VotingRegressor([
         ('random_forest', SVR(kernel='rbf', gamma=0.1)),
         ('krr', KernelRidge(kernel='rbf', gamma=0.1)),
         ('ada', AdaBoostRegressor()), ('rf', RandomForestRegressor()),
         ('et', ExtraTreesRegressor())
     ])
コード例 #30
0
 def create_prediction_pipeline(self) -> Pipeline:
     feature_engineering = FeatureEngineering(**self.feat_eng_parameters)
     x_boost = XGBRegressor(**self.algo_hyperparams["x_boost"])
     rf = RandomForestRegressor(**self.algo_hyperparams["rf"])
     vr = VotingRegressor([("x_boost", x_boost), ("rf", rf)])
     return Pipeline(
         steps=[
             ("feature_engineering", feature_engineering),
             ("voting_regressor", vr),
         ]
     )