Esempio n. 1
0
def test_features_in_secondary():
    lr = LinearRegression()
    svr_lin = SVR(kernel='linear', gamma='auto')
    rf = RandomForestRegressor(n_estimators=10, random_state=2)
    ridge = Ridge(random_state=0)
    svr_rbf = SVR(kernel='rbf', gamma='auto')
    stack = StackingRegressor(regressors=[svr_lin, lr, ridge, rf],
                              meta_regressor=svr_rbf,
                              use_features_in_secondary=True)

    stack.fit(X1, y).predict(X1)
    mse = 0.14
    got = np.mean((stack.predict(X1) - y) ** 2)
    print(got)
    assert round(got, 2) == mse

    stack = StackingRegressor(regressors=[svr_lin, lr, ridge, rf],
                              meta_regressor=svr_rbf,
                              use_features_in_secondary=False)

    # dense
    stack.fit(X1, y).predict(X1)
    mse = 0.12
    got = np.mean((stack.predict(X1) - y) ** 2)
    print(got)
    assert round(got, 2) == mse
def test_get_coeff():
    lr = LinearRegression()
    svr_lin = SVR(kernel='linear')
    ridge = Ridge(random_state=1)
    stregr = StackingRegressor(regressors=[svr_lin, lr],
                               meta_regressor=ridge)
    stregr.fit(X1, y)
    got = stregr.coef_
    expect = np.array([0.4874216, 0.45518317])
    assert_almost_equal(got, expect)
def test_get_intercept():
    lr = LinearRegression()
    svr_lin = SVR(kernel='linear')
    ridge = Ridge(random_state=1)
    stregr = StackingRegressor(regressors=[svr_lin, lr],
                               meta_regressor=ridge)
    stregr.fit(X1, y)
    got = stregr.intercept_
    expect = 0.024
    assert round(got, 3) == expect
def test_predict_meta_features():
    lr = LinearRegression()
    svr_rbf = SVR(kernel='rbf')
    ridge = Ridge(random_state=1)
    stregr = StackingRegressor(regressors=[lr, ridge],
                               meta_regressor=svr_rbf)
    X_train, X_test, y_train, y_test = train_test_split(X2, y, test_size=0.3)
    stregr.fit(X_train, y_train)
    test_meta_features = stregr.predict(X_test)
    assert test_meta_features.shape[0] == X_test.shape[0]
def test_multivariate_class():
    lr = LinearRegression()
    ridge = Ridge(random_state=1)
    meta = LinearRegression(normalize=True)
    stregr = StackingRegressor(regressors=[lr, ridge],
                               meta_regressor=meta)
    stregr.fit(X2, y2).predict(X2)
    mse = 0.122
    got = np.mean((stregr.predict(X2) - y2) ** 2)
    assert round(got, 3) == mse
def test_train_meta_features_():
    lr = LinearRegression()
    svr_rbf = SVR(kernel='rbf')
    ridge = Ridge(random_state=1)
    stregr = StackingRegressor(regressors=[lr, ridge],
                               meta_regressor=svr_rbf,
                               store_train_meta_features=True)
    X_train, X_test, y_train, y_test = train_test_split(X2, y, test_size=0.3)
    stregr.fit(X_train, y_train)
    train_meta_features = stregr.train_meta_features_
    assert train_meta_features.shape[0] == X_train.shape[0]
def test_different_models():
    lr = LinearRegression()
    svr_lin = SVR(kernel='linear')
    ridge = Ridge(random_state=1)
    svr_rbf = SVR(kernel='rbf')
    stregr = StackingRegressor(regressors=[svr_lin, lr, ridge],
                               meta_regressor=svr_rbf)
    stregr.fit(X1, y).predict(X1)
    mse = 0.21
    got = np.mean((stregr.predict(X1) - y) ** 2)
    assert round(got, 2) == mse
def test_multivariate():
    lr = LinearRegression()
    svr_lin = SVR(kernel='linear')
    ridge = Ridge(random_state=1)
    svr_rbf = SVR(kernel='rbf')
    stregr = StackingRegressor(regressors=[svr_lin, lr, ridge],
                               meta_regressor=svr_rbf)
    stregr.fit(X2, y).predict(X2)
    mse = 0.218
    got = np.mean((stregr.predict(X2) - y) ** 2)
    assert round(got, 3) == mse
Esempio n. 9
0
def test_weight_unsupported_meta():
    # meta regressor with no support for
    # sample_weight should raise error
    lr = LinearRegression()
    svr_lin = SVR(kernel='linear', gamma='auto')
    ridge = Ridge(random_state=1)
    lasso = Lasso(random_state=1)
    stregr = StackingRegressor(regressors=[svr_lin, lr, ridge],
                               meta_regressor=lasso)

    with pytest.raises(TypeError):
        stregr.fit(X1, y, sample_weight=w).predict(X1)
Esempio n. 10
0
def test_multivariate():
    lr = LinearRegression()
    svr_lin = SVR(kernel='linear')
    ridge = Ridge(random_state=1)
    svr_rbf = SVR(kernel='rbf')
    stregr = StackingRegressor(regressors=[svr_lin, lr, ridge],
                               meta_regressor=svr_rbf)
    stregr.fit(X2, y).predict(X2)
    mse = 0.218
    got = np.mean((stregr.predict(X2) - y)**2)
    print(got)
    assert round(got, 3) == mse
def test_multivariate_class():
    lr = LinearRegression()
    ridge = Ridge(random_state=1)
    meta = LinearRegression(normalize=True)
    stregr = StackingRegressor(regressors=[lr, ridge],
                               meta_regressor=meta)
    stregr.fit(X2, y2).predict(X2)
    mse = 0.12
    got = np.mean((stregr.predict(X2) - y2) ** 2.)
    # there seems to be an issue with the following test on Windows
    # sometimes via Appveyor
    assert round(got, 2) == mse, got
Esempio n. 12
0
def test_weight_ones():
    # sample weight of ones should produce equivalent outcome as no weight
    lr = LinearRegression()
    svr_lin = SVR(kernel='linear', gamma='auto')
    ridge = Ridge(random_state=1)
    svr_rbf = SVR(kernel='rbf', gamma='auto')
    stregr = StackingRegressor(regressors=[svr_lin, lr, ridge],
                               meta_regressor=svr_rbf)
    pred1 = stregr.fit(X1, y).predict(X1)
    pred2 = stregr.fit(X1, y, sample_weight=np.ones(40)).predict(X1)
    maxdiff = np.max(np.abs(pred1 - pred2))
    assert maxdiff < 1e-3, "max diff is %.4f" % maxdiff
Esempio n. 13
0
def regressionStacking(df):

    # StackingRegressor inputdata type is ndarray

    X_train, X_test, y_train, y_test = trainDataSplit(df)

    randomforest_regressor = RandomForestRegressor()

    # # lightgbm不是scikit-learn的包,mlxtend不支持
    # lgb_train = lightgbm.Dataset(X_train, y_train)
    # lgb_eval = lightgbm.Dataset(X_test, y_test, reference=lgb_train)
    #
    # # specify your configurations as a dict
    # params = {
    #     'task': 'train',
    #     'boosting_type': 'gbdt',
    #     'objective': 'regression',
    #     'metric': {'l2', 'auc'},
    #     'num_leaves': 2 ** 10,
    #     'learning_rate': 1.0,
    #     'feature_fraction': 0.9,
    #     'bagging_fraction': 0.8,
    #     'bagging_freq': 5,
    #     'verbose': 0
    # }
    # lightgbm_regressor = lightgbm.train(params,
    #                            lgb_train,
    #                            num_boost_round=20,
    #                            valid_sets=lgb_eval,
    #                            early_stopping_rounds=5)

    lasso_regressor = Lasso()

    dnn_regressor = MLPRegressor()

    linearRegression_regressor = LinearRegression()

    stacking_regressor = StackingRegressor(
        regressors=[randomforest_regressor, lasso_regressor, dnn_regressor],
        meta_regressor=linearRegression_regressor)

    stacking_regressor.fit(X_train, X_train)

    y_pred = stacking_regressor.predict(X_test)

    criterion_df, predict_result = predictResultOutput(stacking_regressor,
                                                       X_test, y_test, y_pred)

    # save model
    joblib.dump(stacking_regressor, 'stacking.model')

    return criterion_df, predict_result
Esempio n. 14
0
def sbg_mlxtend_ensamble(iterate):
    iterate += 501
    lin_mod = linear_model.LinearRegression()
    bsn_rdg = linear_model.BayesianRidge()
    elstc_nt = ElasticNet(alpha=0.2, l1_ratio=1)
    ridge = Ridge(alpha=0.01, tol=0.1, solver='sag')
    svr_rbf = svm.SVR(kernel='rbf', C=1e3, gamma=0.1)
    sgd_reg = linear_model.SGDRegressor(penalty='l2', alpha=0.001, n_iter=1000)
    lasso_reg = linear_model.Lasso(alpha=1,
                                   max_iter=3000,
                                   normalize='True',
                                   selection='random',
                                   tol=0.001)
    rndm_frst = RandomForestRegressor(max_depth=5, n_estimators=10)

    stregr = StackingRegressor(regressors=[sgd_reg, rndm_frst],
                               meta_regressor=ridge)

    X_train, X_test, y_train, y_test = train_test_split(df_X,
                                                        df_Y2,
                                                        test_size=0.20,
                                                        random_state=iterate)
    X_train = sc.fit_transform(X_train)
    X_test = sc.transform(X_test)
    stregr.fit(X_train, y_train)
    y_pred = stregr.predict(X_test)

    #print("Mean Squared Error: %.4f"
    #      % np.mean((y_pred - y_test.values) ** 2))
    #print('Variance Score: %.4f' % stregr.score(X_test, y_test.values))

    dev_Memory = abs(y_pred - y_test.values)
    mean_dev = np.mean(dev_Memory)
    mse_Memory = np.sqrt(np.sum(dev_Memory**2) / dev_Memory.size)
    mape = np.mean(dev_Memory / y_test.values)
    max_pe = np.max(dev_Memory)
    max_ne = np.max(np.negative(dev_Memory))
    new_data1 = pd.DataFrame(y_pred)
    new_data2 = pd.DataFrame(y_test.values)
    new_data = pd.merge(new_data1,
                        new_data2,
                        left_index=True,
                        right_index=True)

    filename12 = r'C:\Users\epatdeb\AlphaCANDI\SBG_Rawinput_1.6\latest\Logs\AlphaCandi17_MlxEnsmbl_Memory.log'
    logging.basicConfig(filename=filename12, level=logging.DEBUG)
    logging.info(
        "tensor_bp sbg_mlxtend_ensamble iter:%s \n \n y_pred/y_test: \n %s \n mae:%s mse:%s mape:%s max_pe:%s max_ne:%s",
        iterate, new_data, mean_dev, mse_Memory, mape, max_pe, max_ne)
    logging.shutdown()

    return mean_squared_error(y_test, y_pred), mean_dev, mape
def test_weight_unsupported_regressor():
    # including regressor that does not support
    # sample_weight should raise error
    lr = LinearRegression()
    svr_lin = SVR(kernel='linear', gamma='auto')
    ridge = Ridge(random_state=1)
    svr_rbf = SVR(kernel='rbf', gamma='auto')
    knn = KNeighborsRegressor()
    stregr = StackingRegressor(regressors=[svr_lin, lr, ridge, knn],
                               meta_regressor=svr_rbf)

    with pytest.raises(TypeError):
        stregr.fit(X1, y, sample_weight=w).predict(X1)
Esempio n. 16
0
def test_weight_unsupported_regressor():
    # including regressor that does not support
    # sample_weight should raise error
    lr = LinearRegression()
    svr_lin = SVR(kernel='linear', gamma='auto')
    ridge = Ridge(random_state=1)
    svr_rbf = SVR(kernel='rbf', gamma='auto')
    lasso = Lasso(random_state=1)
    stregr = StackingRegressor(regressors=[svr_lin, lr, ridge, lasso],
                               meta_regressor=svr_rbf)

    with pytest.raises(TypeError):
        stregr.fit(X1, y, sample_weight=w).predict(X1)
Esempio n. 17
0
def test_weight_unsupported_with_no_weight():
    # pass no weight to regressors with no weight support
    # should not be a problem
    lr = LinearRegression()
    svr_lin = SVR(kernel='linear', gamma='auto')
    ridge = Ridge(random_state=1)
    svr_rbf = SVR(kernel='rbf', gamma='auto')
    lasso = Lasso(random_state=1)
    stregr = StackingRegressor(regressors=[svr_lin, lr, ridge, lasso],
                               meta_regressor=svr_rbf)
    stregr.fit(X1, y).predict(X1)

    stregr = StackingRegressor(regressors=[svr_lin, lr, ridge],
                               meta_regressor=lasso)
    stregr.fit(X1, y).predict(X1)
def test_weight_unsupported_with_no_weight():
    # pass no weight to regressors with no weight support
    # should not be a problem
    lr = LinearRegression()
    svr_lin = SVR(kernel='linear', gamma='auto')
    ridge = Ridge(random_state=1)
    svr_rbf = SVR(kernel='rbf', gamma='auto')
    knn = KNeighborsRegressor()
    stregr = StackingRegressor(regressors=[svr_lin, lr, ridge, knn],
                               meta_regressor=svr_rbf)
    stregr.fit(X1, y).predict(X1)

    stregr = StackingRegressor(regressors=[svr_lin, lr, ridge],
                               meta_regressor=knn)
    stregr.fit(X1, y).predict(X1)
Esempio n. 19
0
def test_weight_unsupported_with_no_weight():
    # pass no weight to regressors with no weight support
    # should not be a problem
    lr = LinearRegression()
    svr_lin = SVR(kernel='linear')
    ridge = Ridge(random_state=1)
    svr_rbf = SVR(kernel='rbf')
    lasso = Lasso(random_state=1)
    stregr = StackingRegressor(regressors=[svr_lin, lr, ridge, lasso],
                               meta_regressor=svr_rbf)
    stregr.fit(X1, y).predict(X1)

    stregr = StackingRegressor(regressors=[svr_lin, lr, ridge],
                               meta_regressor=lasso)
    stregr.fit(X1, y).predict(X1)
Esempio n. 20
0
def test_predictions_from_sparse_matrix():
    lr = LinearRegression()
    svr_lin = SVR(kernel='linear')
    ridge = Ridge(random_state=1)
    stregr = StackingRegressor(regressors=[svr_lin, lr], meta_regressor=ridge)

    # dense
    stregr.fit(X1, y)
    print(stregr.score(X1, y))
    assert round(stregr.score(X1, y), 2) == 0.61

    # sparse
    stregr.fit(sparse.csr_matrix(X1), y)
    print(stregr.score(X1, y))
    assert round(stregr.score(X1, y), 2) == 0.61
Esempio n. 21
0
def test_sample_weight():
    lr = LinearRegression()
    svr_lin = SVR(kernel='linear')
    ridge = Ridge(random_state=1)
    svr_rbf = SVR(kernel='rbf')
    stregr = StackingRegressor(regressors=[svr_lin, lr, ridge],
                               meta_regressor=svr_rbf)
    pred1 = stregr.fit(X1, y, sample_weight=w).predict(X1)
    mse = 0.22
    got = np.mean((stregr.predict(X1) - y)**2)
    assert round(got, 2) == mse
    # make sure that this is not equivalent to the model with no weight
    pred2 = stregr.fit(X1, y).predict(X1)
    maxdiff = np.max(np.abs(pred1 - pred2))
    assert maxdiff > 1e-3, "max diff is %.4f" % maxdiff
Esempio n. 22
0
def Gbc():
    from sklearn.ensemble import GradientBoostingClassifier, AdaBoostRegressor
    from sklearn.linear_model import LogisticRegression
    from mlxtend.regressor import StackingRegressor
    from sklearn.svm import SVR
    adaboost = AdaBoostRegressor()
    lr = LogisticRegression
    gb = GradientBoostingClassifier()
    svr = SVR(kernel='linear')
    svr_rbf = SVR(kernel='rbf')
    regressors = [svr, adaboost, gb]
    stregr = StackingRegressor(regressors=regressors, meta_regressor=svr_rbf)
    stregr.fit(X_train, y_train)
    outpred = stregr.predict(X_valid)
    evaluate_strategy(outpred)
Esempio n. 23
0
def train_model(X_train, y_train):
    clf1 = LinearSVR()
    clf2 = LinearRegression()
    clf3 = Ridge()
    clf4 = LGBMRegressor()

    svr_linear = LinearSVR()
    sr = StackingRegressor(regressors=[clf1, clf2, clf3, clf4],
                           meta_regressor=svr_linear)

    sr.fit(X_train, y_train)
    result = sr.predict(X_train)
    score = get_rmse_score(result, y_train)
    print("RMSE Score train: %.4f" % score)
    return sr
Esempio n. 24
0
def test_sample_weight():
    lr = LinearRegression()
    svr_lin = SVR(kernel='linear', gamma='auto')
    ridge = Ridge(random_state=1)
    svr_rbf = SVR(kernel='rbf', gamma='auto')
    stregr = StackingRegressor(regressors=[svr_lin, lr, ridge],
                               meta_regressor=svr_rbf)
    pred1 = stregr.fit(X1, y, sample_weight=w).predict(X1)
    mse = 0.22
    got = np.mean((stregr.predict(X1) - y) ** 2)
    assert round(got, 2) == mse
    # make sure that this is not equivalent to the model with no weight
    pred2 = stregr.fit(X1, y).predict(X1)
    maxdiff = np.max(np.abs(pred1 - pred2))
    assert maxdiff > 1e-3, "max diff is %.4f" % maxdiff
Esempio n. 25
0
def test_get_coeff_fail():
    lr = LinearRegression()
    svr_rbf = SVR(kernel='rbf')
    ridge = Ridge(random_state=1)
    stregr = StackingRegressor(regressors=[ridge, lr], meta_regressor=svr_rbf)
    stregr = stregr.fit(X1, y)
    got = stregr.coef_
Esempio n. 26
0
def test_predictions_from_sparse_matrix():
    lr = LinearRegression()
    svr_lin = SVR(kernel='linear', gamma='auto')
    ridge = Ridge(random_state=1)
    stregr = StackingRegressor(regressors=[svr_lin, lr],
                               meta_regressor=ridge)

    # dense
    stregr.fit(X1, y)
    print(stregr.score(X1, y))
    assert round(stregr.score(X1, y), 2) == 0.61

    # sparse
    stregr.fit(sparse.csr_matrix(X1), y)
    print(stregr.score(X1, y))
    assert round(stregr.score(X1, y), 2) == 0.61
def test_get_coeff_fail():
    lr = LinearRegression()
    svr_rbf = SVR(kernel='rbf')
    ridge = Ridge(random_state=1)
    stregr = StackingRegressor(regressors=[ridge, lr],
                               meta_regressor=svr_rbf)
    stregr = stregr.fit(X1, y)
    got = stregr.coef_
def test_get_coeff_fail():
    lr = LinearRegression()
    svr_rbf = SVR(kernel='rbf', gamma='auto')
    ridge = Ridge(random_state=1)
    stregr = StackingRegressor(regressors=[ridge, lr], meta_regressor=svr_rbf)

    with pytest.raises(AttributeError):
        stregr = stregr.fit(X1, y)
        r = stregr.coef_
        assert r
Esempio n. 29
0
    def stackModel(self):
        train_X = self.X.as_matrix()
        train_Y = self.Y.as_matrix()

        test_X = self.Test.as_matrix()

        # train_X = data_scaler(train_X)

        X_train, X_test, y_train, y_test = train_test_split(train_X, train_Y, test_size=0.2, random_state=1)

        gbdt = GradientBoostingRegressor(loss='ls', alpha=0.9,
                                         n_estimators=500,
                                         learning_rate=0.05,
                                         max_depth=8,
                                         subsample=0.8,
                                         min_samples_split=9,
                                         max_leaf_nodes=10)
        xgb = XGBRegressor(max_depth=5, n_estimators=500, learning_rate=0.05, silent=False)
        lr = LinearRegression()
        rfg = RandomForestRegressor(bootstrap=False, max_features=0.05, min_samples_leaf=11, min_samples_split=8,
                                    n_estimators=100)
        svr_rbf = SVR(kernel='rbf')

        stregr = StackingRegressor(regressors=[gbdt, xgb, lr, rfg], meta_regressor=svr_rbf)

        stregr.fit(X_train, y_train)
        stregr.predict(X_train)

        # Evaluate and visualize the fit

        print("Mean Squared Error: %.6f" % np.mean((stregr.predict(X_train) - y_train) ** 2) ** 0.5)
        error(stregr.predict(X_test), y_test)

        # online
        result = stregr.predict(test_X)
        save_to_file(result, self.uid, "../result/result_12.09_2_stacking.csv")

        with plt.style.context(('seaborn-whitegrid')):
            plt.scatter(X_train, y_train, c='lightgray')
            plt.plot(X_train, stregr.predict(X_train), c='darkgreen', lw=2)

        plt.show()
Esempio n. 30
0
def test_different_models():
    lr = LinearRegression()
    svr_lin = SVR(kernel='linear')
    ridge = Ridge(random_state=1)
    svr_rbf = SVR(kernel='rbf')
    stregr = StackingRegressor(regressors=[svr_lin, lr, ridge],
                               meta_regressor=svr_rbf)
    y_pred = stregr.fit(X1, y).predict(X1)
    mse = 0.214
    got = np.mean((stregr.predict(X1) - y)**2)
    assert round(got, 3) == mse
Esempio n. 31
0
def test_get_coeff_fail():
    lr = LinearRegression()
    svr_rbf = SVR(kernel='rbf', gamma='auto')
    ridge = Ridge(random_state=1)
    stregr = StackingRegressor(regressors=[ridge, lr],
                               meta_regressor=svr_rbf)

    with pytest.raises(AttributeError):
        stregr = stregr.fit(X1, y)
        r = stregr.coef_
        assert r
Esempio n. 32
0
def test_sparse_matrix_inputs_and_features_in_secondary():
    lr = LinearRegression()
    svr_lin = SVR(kernel='linear')
    rf = RandomForestRegressor(random_state=2)
    ridge = Ridge(random_state=0)
    svr_rbf = SVR(kernel='rbf')
    stack = StackingRegressor(regressors=[svr_lin, lr, ridge, rf],
                              meta_regressor=svr_rbf,
                              use_features_in_secondary=True)

    # dense
    stack.fit(X1, y).predict(X1)
    mse = 0.14
    got = np.mean((stack.predict(X1) - y)**2)
    assert round(got, 2) == mse

    # sparse
    stack.fit(sparse.csr_matrix(X1), y)
    mse = 0.14
    got = np.mean((stack.predict(sparse.csr_matrix(X1)) - y)**2)
    assert round(got, 2) == mse
Esempio n. 33
0
    def train(self, X, y):
        features = X
        labels = y

        #test train split
        X_train, X_test, Y_train, Y_test = train_test_split(X,
                                                            y,
                                                            test_size=0.25,
                                                            random_state=4)

        #Ridge
        regcv = linear_model.RidgeCV(
            alphas=[0.05, 0.1, 0.3, 1, 3, 5, 10, 15, 30, 50, 75])
        regcv.fit(features, labels)
        regcv.alpha_
        reg = linear_model.Ridge(alpha=regcv.alpha_)
        reg.fit(features, labels)

        # GB
        params = {
            'n_estimators': 100,
            'max_depth': 5,
            'min_samples_split': 2,
            'learning_rate': 0.1,
            'loss': 'ls'
        }
        gbr = ensemble.GradientBoostingRegressor(**params)
        gbr.fit(features, labels)

        #blended model
        meta = linear_model.LinearRegression()
        blender = StackingRegressor(regressors=[reg, gbr], meta_regressor=meta)
        _ = blender.fit(features, labels)
        y_pred = blender.predict(X_test)

        print "***** TRAINING STATS ********"
        scores = cross_val_score(blender, features, labels, cv=10)
        print("Accuracy: %0.2f (+/- %0.2f)" %
              (scores.mean(), scores.std() * 2))

        mean_diff = np.mean(np.abs(np.exp(Y_test) - np.exp(y_pred)))
        p_mean_diff = np.mean(mean_diff / np.exp(Y_test))
        print "Mean Error:\t %.0f/%0.3f%%" % (mean_diff, p_mean_diff * 100)
        print "***** TRAINING STATS ********"

        return blender
Esempio n. 34
0
  def train(self, X,y):
    features = X
    labels = y

    #test train split
    X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=0.25, random_state=4)

    #Ridge
    regcv = linear_model.RidgeCV(alphas=[0.05, 0.1, 0.3, 1, 3, 5, 10, 15, 30, 50, 75])
    regcv.fit(features, labels)
    regcv.alpha_  
    reg = linear_model.Ridge(alpha=regcv.alpha_)
    reg.fit(features, labels)

    # GB
    params = {'n_estimators': 100, 'max_depth': 5, 'min_samples_split': 2,
              'learning_rate': 0.1, 'loss': 'ls'}
    gbr = ensemble.GradientBoostingRegressor(**params)
    gbr.fit(features, labels)


    #blended model
    meta = linear_model.LinearRegression()
    blender = StackingRegressor(regressors=[reg, gbr], meta_regressor=meta)
    _=blender.fit(features, labels)
    y_pred = blender.predict(X_test)

    print "***** TRAINING STATS ********"
    scores = cross_val_score(blender, features, labels, cv=10)
    print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

    mean_diff = np.mean(np.abs(np.exp(Y_test)-np.exp(y_pred)))
    p_mean_diff = np.mean(mean_diff/np.exp(Y_test))
    print "Mean Error:\t %.0f/%0.3f%%" % (mean_diff, p_mean_diff*100)
    print "***** TRAINING STATS ********"
    
    return blender
Esempio n. 35
0
scaler=StandardScaler()
scaler.fit(train_x)
#train_x=scaler.transform(train_x)

#LGB.fit(train_x,y_train)
rid=KNeighborsRegressor(n_jobs=3, n_neighbors=4)
rf=LinearRegression()
str=StackingRegressor(regressors=[LGB,rid],verbose=1,meta_regressor=rf)
print('Overall RMPSE')
cv=cross_validate(str,train_x,y_train,scoring=('neg_mean_squared_error'),return_train_score=False,cv=10)
print(np.sqrt(np.abs(np.mean(cv['test_score']))))

#Grabbing Feature Importance#
#rint('grabbing feature importance')
#GB.fit(train_x,y_train)
#eature_df=pd.DataFrame({'Cols':train_x.columns,'Vals':LGB.feature_importances_})
#eature_df=feature_df.sort_values(['Vals'],ascending=[0])



#Use when Submitting Below#
'''
test_x=temp.tail(1459)
test_x=scaler.transform(test_x)
str.fit(train_x,y_train)
preds=np.expm1(str.predict(test_x))
id_array = list(range(1461,2920))
submission_frame=pd.DataFrame({'id':id_array,'SalePrice':preds})
submission_frame=submission_frame[['id','SalePrice']]
submission_frame.to_csv('out.csv',index=False)
'''
Esempio n. 36
0
#==============================================================================
#   4) LGBMRegressor模型
#==============================================================================
#    from lightgbm import LGBMRegressor
#    
#    model_lgb = LGBMRegressor()
#==============================================================================
#     5) 融合模型
#==============================================================================
    from mlxtend.regressor import StackingRegressor    
#
    regressors = [model_xgb,model_rfg,model_gb]
    model = StackingRegressor(regressors=regressors, meta_regressor=model_xgb)

#    model = model_gb
    model.fit(train_text,train_labels)
    
#    print('The parameters of the best model are: ')
#    print(model.best_params_)
  
    preds = model.predict(train_text)
    print('The pearsonr of training set is {}'.format(pearsonr (list(train_labels), list(preds))[0]))
    print('The MSE of training set is {}'.format(mean_squared_error(list(train_labels), list(preds))))
      
    #==============================================================================
    # 预测 测试集
    #==============================================================================   
    preds = model.predict(test_text)
    
    print('The pearsonr of test set is {}'.format(pearsonr (list(test_labels), list(preds))[0]))
    print('The MSE of test set is {}'.format(mean_squared_error(list(test_labels), list(preds))))
Esempio n. 37
0
def main():
    """
    load data
    """
    train_set = pd.read_csv('../data/train.csv')
    test_set = pd.read_csv('../data/test.csv')
    """
    Remove Outliers
    """
    outliers = train_set[(train_set['GrLivArea'] > 4000)
                         & (train_set['SalePrice'] < 300000)].index
    train_set.drop(outliers, inplace=True)
    """
    fix salePrice skewness
    """
    train_set["SalePrice"] = np.log1p(train_set["SalePrice"])
    y_train_values = train_set["SalePrice"].values
    """
    prepare combined data.
    """
    train_set_id = train_set['Id']
    test_set_id = test_set['Id']

    train_set_rows = train_set.shape[0]
    test_set_rows = test_set.shape[0]

    train_set.drop('Id', axis=1, inplace=True)
    test_set.drop('Id', axis=1, inplace=True)
    train_set.drop('SalePrice', axis=1, inplace=True)

    combined_data = pd.concat((train_set, test_set))
    """
    create data transform pipeline
    """
    transform_pipeline = Pipeline(steps=[
        ('NaNFixer', NaNFixer()),
        ('SkewFixer', SkewFixer()),
        ('Scaler', Scaler()),
        ('FeatureDropper', FeatureDropper()),
        ('Dummyfier', Dummyfier()),
        #('TrainDataSeparator', TrainDataSeparator(train_set_rows=train_set_rows)),
    ])

    transformed_data = transform_pipeline.transform(combined_data)
    train_data = transformed_data[:train_set_rows]
    predict_data = transformed_data[train_set_rows:]
    """
    try various regressors
    """

    rf_param = {'n_estimators': [10, 12], 'max_depth': [3], 'n_jobs': [-1]}

    ls_param = {
        'alpha': [0.0001, 0.0002, 0.0003, 0.0004, 0.0005],
        'max_iter': [10000],
        "normalize": [True, False]
    }

    elnet_param = {
        'alpha': [0.0008, 0.004, 0.005],
        'l1_ratio': [0.08, 0.1, 0.3],
        'max_iter': [10000]
    }

    ridge_param = {'alpha': [35, 40, 45, 50, 55, 60, 65, 70, 80, 90]}

    # gbm_param = {"n_estimators": [1000],
    #              'min_child_weight': [1, 5, 10],
    #              'gamma': [0.1, 0.5, 1, 1.5, 2, 5],
    #              'subsample': [0.6, 0.8, 1.0],
    #              'colsample_bytree': [0.6, 0.8, 1.0],
    #              'max_depth': [3, 4, 5],
    #              'eta': [0.01],
    #              'eval_metric': ['mae']
    #              }
    #

    gbm_param = {"n_estimators": [1000]}

    lgb_params = {
        'objective': ['regression'],
        'num_leaves': [255],
        'max_depth': [8],
        'bagging_seed': [3],
        'boosting_type': ['gbdt']
        # ,
        # 'min_sum_hessian_in_leaf' : [100],
        # 'learning_rate': np.linspace(0.05, 0.1, 3),
        # 'bagging_fraction': np.linspace(0.7, 0.9, 3),
        # 'bagging_freq': np.linspace(30, 50, 3, dtype='int'),
        # 'max_bin': [15, 63, 255],
    }

    # grid(SVR()).grid_get(X_scaled,y_log,{'C':[11,13,15],'kernel':["rbf"],"gamma":[0.0003,0.0004],"epsilon":[0.008,0.009]})
    # param_grid={'alpha':[0.2,0.3,0.4], 'kernel':["polynomial"], 'degree':[3],'coef0':[0.8,1]}
    # grid(KernelRidge()).grid_get(X_scaled,y_log,param_grid)

    rf = get_best_estimator(train_data,
                            y_train_values,
                            estimator=RandomForestRegressor(),
                            params=rf_param)
    elnet = get_best_estimator(train_data,
                               y_train_values,
                               estimator=ElasticNet(),
                               params=elnet_param)

    lso = get_best_estimator(train_data,
                             y_train_values,
                             estimator=Lasso(),
                             params=ls_param)
    rdg = get_best_estimator(train_data,
                             y_train_values,
                             estimator=Ridge(),
                             params=ridge_param)

    gbm = get_best_estimator(train_data,
                             y_train_values,
                             estimator=xgb.XGBRegressor(),
                             params=gbm_param)
    lbm = get_best_estimator(train_data,
                             y_train_values,
                             estimator=lgb.LGBMRegressor(),
                             params=lgb_params)

    model = StackingRegressor(regressors=[rf, elnet, lso, rdg, gbm, lbm],
                              meta_regressor=Lasso(alpha=0.0005))

    # Fit the model on our data
    model.fit(train_data, y_train_values)

    y_pred = model.predict(train_data)
    print(sqrt(mean_squared_error(y_train_values, y_pred)))

    # Predict test set
    ensembled = np.expm1(model.predict(predict_data))
    """
    export submission data
    """
    submission = pd.DataFrame({"Id": test_set_id, "SalePrice": ensembled})
    submission.to_csv('submission.csv', index=False)
    """" Ensemble Weights """
    from scipy.optimize import minimize
    regressors = [rf, elnet, lso, rdg, gbm, lbm]

    predictions = []
    for clf in regressors:
        predictions.append(
            clf.predict(train_data))  # listing all our predictions

    def mse_func(weights):
        # scipy minimize will pass the weights as a numpy array
        final_prediction = 0
        for weight, prediction in zip(weights, predictions):
            final_prediction += weight * prediction
        return mean_squared_error(y_train_values, final_prediction)

    starting_values = [0.5] * len(
        predictions)  # minimize need a starting value
    bounds = [(0, 1)] * len(predictions)  # weights are bound between 0 and 1
    res = minimize(mse_func, starting_values, bounds=bounds, method='SLSQP')
    print('Result Assessment: {message_algo}'.format(
        message_algo=res['message']))
    print('Ensemble Score: {best_score}'.format(best_score=res['fun']))
    print('Best Weights: {weights}'.format(weights=res['x']))
Esempio n. 38
0
def main():
    print("Reading in Data")
    #最终预测
    train = pd.read_csv('cleaned_train20180129_111517.csv')
    test = pd.read_csv('cleaned_test20180129_111517.csv')

    #验证A榜结果
    #train = pd.read_csv('cleaned_train20180129_102513.csv')
    #test = pd.read_csv('cleaned_test20180129_102513.csv')

    test = test.drop(['id'], axis=1)
    train = train.drop(['id'], axis=1)
    y_train = train['血糖']

    #pred_proba为测试集血糖权重
    threshold = 6.5
    test_num = len(test)
    train_num = len(train)
    bigger_thr_X, bigger_thr_y, less_thr_X, less_thr_y, test_concat, X_train, pred_proba = fuck_columns(
        train, test, threshold)

    print("linear model 开始训练")
    pred_bigger, pred_less, linear_bigger, linear_less = linear_model(
        bigger_thr_X, bigger_thr_y, less_thr_X, less_thr_y, test_concat)
    #预测结果结合权重
    linear_pred_res = np.array([
        pred_less[i] * pred_proba[i][0] + pred_bigger[i] * pred_proba[i][1]
        for i in range(test_num)
    ])

    print("lasso model 开始训练")
    pred_bigger, pred_less, lasso_bigger, lasso_less = lasso_model(
        bigger_thr_X, bigger_thr_y, less_thr_X, less_thr_y, test_concat)
    #预测结果结合权重
    lasso_pred_res = np.array([
        pred_less[i] * pred_proba[i][0] + pred_bigger[i] * pred_proba[i][1]
        for i in range(test_num)
    ])

    print("ENet model 开始训练")
    pred_bigger, pred_less, ENet_bigger, ENet_less = ENet_model(
        bigger_thr_X, bigger_thr_y, less_thr_X, less_thr_y, test_concat)
    #预测结果结合权重
    ENet_pred_res = np.array([
        pred_less[i] * pred_proba[i][0] + pred_bigger[i] * pred_proba[i][1]
        for i in range(test_num)
    ])

    print("集成模型开始训练...")
    print("RandomForestRegressor...")
    pred_bigger, pred_less, rf_bigger, rf_less = rf_model(
        bigger_thr_X, bigger_thr_y, less_thr_X, less_thr_y, test_concat)
    #预测结果结合权重
    rf_pred_res = np.array([
        pred_less[i] * pred_proba[i][0] + pred_bigger[i] * pred_proba[i][1]
        for i in range(test_num)
    ])

    print("GradientBoostingRegressor...")
    pred_bigger, pred_less, gb_bigger, gb_less = GBoost_model(
        bigger_thr_X, bigger_thr_y, less_thr_X, less_thr_y, test_concat)
    #预测结果结合权重
    gb_pred_res = np.array([
        pred_less[i] * pred_proba[i][0] + pred_bigger[i] * pred_proba[i][1]
        for i in range(test_num)
    ])

    print("LGBMRegressor...")
    pred_bigger, pred_less, lgb_bigger, lgb_less = LGBM_model(
        bigger_thr_X, bigger_thr_y, less_thr_X, less_thr_y, test_concat)
    #预测结果结合权重
    lgb_pred_res = np.array([
        pred_less[i] * pred_proba[i][0] + pred_bigger[i] * pred_proba[i][1]
        for i in range(test_num)
    ])

    print("XGBRegressor...")
    pred_bigger, pred_less, xgb_bigger, xgb_less = xgb_model(
        bigger_thr_X, bigger_thr_y, less_thr_X, less_thr_y, test_concat)
    #预测结果结合权重
    xgb_pred_res = np.array([
        pred_less[i] * pred_proba[i][0] + pred_bigger[i] * pred_proba[i][1]
        for i in range(test_num)
    ])
    '''
    Stacking Learning
    '''
    print("StackingRegressor...")
    stacked_averaged_bigger_models = StackingRegressor(
        regressors=[linear_bigger, lasso_bigger, ENet_bigger],
        meta_regressor=gb_bigger)
    stacked_averaged_less_models = StackingRegressor(
        regressors=[linear_less, lasso_less, ENet_less],
        meta_regressor=gb_less)
    #拟合模型
    stacked_averaged_bigger_models.fit(bigger_thr_X, bigger_thr_y)
    stacked_averaged_less_models.fit(less_thr_X, less_thr_y)
    #测试集预测
    stacked_bigger_pred = stacked_averaged_bigger_models.predict(test_concat)
    stacked_less_pred = stacked_averaged_less_models.predict(test_concat)
    #预测结果结合权重
    stacked_pred_res = np.array([
        stacked_less_pred[i] * pred_proba[i][0] +
        stacked_bigger_pred[i] * pred_proba[i][1] for i in range(test_num)
    ])

    ensemble = stacked_pred_res * 0.40 + xgb_pred_res * 0.40 + lgb_pred_res * 0.20
    #stacking融合linear
    new_ensemble = np.array([
        linear_pred_res[i] * pred_proba[i][0] + ensemble[i] * pred_proba[i][1]
        for i in range(test_num)
    ])

    sub = pd.DataFrame({'pred': ensemble})
    sub_wig = pd.DataFrame({'pred': new_ensemble})
    sub.to_csv('submission_b.csv', header=None, index=False)
    sub_wig.to_csv('submission_b_wig.csv', header=None, index=False)
Esempio n. 39
0
y = np.sin(X).ravel()
y[::5] += 3 * (0.5 - np.random.rand(8))

# Initializing models

lr = LinearRegression()
svr_lin = SVR(kernel='linear')
ridge = Ridge(random_state=1)
svr_rbf = SVR(kernel='rbf')

stregr = StackingRegressor(regressors=[svr_lin, lr, ridge],
                           meta_regressor=svr_rbf)

# Training the stacking classifier

stregr.fit(X, y)
stregr.predict(X)

# Evaluate and visualize the fit

print("Mean Squared Error: %.4f" % np.mean((stregr.predict(X) - y)**2))
print('Variance Score: %.4f' % stregr.score(X, y))

with plt.style.context(('seaborn-whitegrid')):
    plt.scatter(X, y, c='lightgray')
    plt.plot(X, stregr.predict(X), c='darkgreen', lw=2)

plt.show()

# Example 2 - Stacked Regression and GridSearch
Esempio n. 40
0
K.set_session(sess)
np.random.seed(7)
rn.seed(7)

from mlxtend.regressor import StackingRegressor
rf = RandomForestRegressor(n_estimators=54, max_depth=None, random_state=8)
ext = ExtraTreesRegressor(n_estimators=584,
                          min_samples_split=2,
                          random_state=8)


def create_model():
    model = Sequential()
    model.add(Dense(540, input_dim=8, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dense(1))
    model.compile(loss='mse', optimizer='adam', metrics=['mse', 'mae'])
    return model


nn = KerasRegressor(build_fn=create_model, epochs=32, batch_size=32, verbose=0)
clf = StackingRegressor(regressors=[nn, ext], meta_regressor=rf)

scores = []
for train, test in kfold.split(X, y):
    clf.fit(X[train], y[train])
    score = clf.score(X[test], y[test])
    print(score)
    scores.append(score)
print("%.3f%% (+/- %.3f)" % (np.mean(scores), np.std(scores)))
Esempio n. 41
0
def predict():
    '''
    For rendering results on HTML GUI
    '''
    features = [x for x in request.form.values()]
    #final_features = [np.array(int_features)]
    #prediction = model.predict(final_features)

    #output = round(prediction[0], 2)

    features = np.array(features)
    features = features.reshape(1, 6)
    features = pd.DataFrame(data=features,
                            columns=[
                                'Name', 'Genre', 'Comments', 'Likes',
                                'Popularity', 'Followers'
                            ])
    df = pd.read_csv('data.csv')
    cv = {'Comments': int, 'Likes': int, 'Popularity': int, 'Followers': int}
    df = df.astype(cv)
    features = features.astype(cv)
    #x=df[df['Views']==0].index

    df.drop(index=df[df['Views'] < df['Likes']].index, axis=1, inplace=True)
    df.drop(index=df[df['Views'] < df['Comments']].index, axis=1, inplace=True)
    df.drop(index=df[df['Views'] < df['Popularity']].index,
            axis=1,
            inplace=True)

    Q1 = df.quantile(0.25)
    Q3 = df.quantile(0.75)
    IQR = Q3 - Q1
    (df < (Q1 - 1.5 * IQR)) | (df > (Q3 + 1.5 * IQR))
    df = df[~((df < (Q1 - 3 * IQR)) | (df > (Q3 + 3 * IQR))).any(axis=1)]

    df = df.drop(
        columns=['Unique_ID', 'Country', 'Song_Name', 'Timestamp', 'index'])

    y = df['Views']
    df = df.drop(columns=['Views'])

    be = BinaryEncoder()
    df = be.fit_transform(df)
    f = be.transform(features)

    X = df.iloc[:, :]
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.3,
                                                        random_state=0)

    rg1 = AdaBoostRegressor()
    rg1.fit(X_train, y_train)
    #ypred=rg1.predict(X_test)
    #sqrt(mean_squared_error(y_test,ypred))

    rg2 = GradientBoostingRegressor(n_estimators=300, learning_rate=0.1)
    # para={'n_estimators':[250,300],'learning_rate':[1,0.1,0.01]}
    # grid=GridSearchCV(estimator=rg8,param_grid=para,verbose=1,cv=10,n_jobs=-1)
    rg2.fit(X_train, y_train)
    #ypred=rg2.predict(X_test)
    #sqrt(mean_squared_error(y_test,ypred))

    rg3 = RandomForestRegressor(random_state=0, n_estimators=20, max_depth=15)
    # para={'n_estimators':[5,10,30,20],'max_depth':[5,8,20,17]}
    # grid=GridSearchCV(estimator=rg9,param_grid=para,cv=10,verbose=1,n_jobs=-1)
    rg3.fit(X_train, y_train)
    #ypred=rg3.predict(X_test)
    #sqrt(mean_squared_error(y_test,ypred))

    rg6 = StackingRegressor([rg1, rg2], meta_regressor=rg3)
    rg6.fit(X_train, y_train)
    #ypred=rg6.predict(X_test)
    #sqrt(mean_squared_error(y_test,ypred))
    f = f.iloc[:, :]
    y_pred = rg6.predict(f)

    y_pred = y_pred.astype(int)

    return render_template(
        'index.html', prediction_text='Numberb of Views is {}'.format(y_pred))
Esempio n. 42
0
    def stacklearning(self):
        class extAll(BaseEstimator, TransformerMixin):
            def __init__(self):
                pass

            def fit(self, X, y=None):
                return self

            def transform(self, X):
                return self

            def predict(self, X):
                return self

        class extMorgan(BaseEstimator, TransformerMixin):
            def __init__(self):
                pass

            def fit(self, X, y=None):
                return self

            def transform(self, X):
                _,morgan,_=sepTables(X)
                return morgan
        class extMACCS(BaseEstimator, TransformerMixin):
            def __init__(self):
                pass

            def fit(self, X, y=None):
                return self

            def transform(self, X):
                maccs,morgan,_=sepTables(X)
                maccs = pd.concat([morgan,maccs],axis=1)

                return maccs

        class extDescriptor(BaseEstimator, TransformerMixin):
            def __init__(self):
                pass

            def fit(self, X, y=None):
                return self

            def transform(self, X):
                maccs,morgan,descriptor=sepTables(X)
                descriptor = pd.concat([morgan,descriptor],axis=1)
                descriptor = pd.concat([maccs,descriptor],axis=1)
                return descriptor

        class extPCA(BaseEstimator, TransformerMixin):
            def __init__(self):
                pass

            def fit(self, X, y=None):
                return self

            def transform(self, X):
                model = PCA(n_components=64)
                _,morgan,_=sepTables(X)
                morgan = morgan.reset_index().drop('index', axis=1)
                W = pd.DataFrame(model.fit_transform(X))
                W = pd.concat([morgan,W],axis=1)
                return W

        lgbm = LGBMRegressor(boosting_type='gbdt', num_leaves= 60,learning_rate=0.06)
        rgf = RGFRegressor(max_leaf=1000, algorithm="RGF",test_interval=100, loss="LS",verbose=False,l2=1.0)
        rgf1 = RGFRegressor(max_leaf=1000, algorithm="RGF",test_interval=100, loss="LS",verbose=False,l2=1.0)
        rgf2 = RGFRegressor(max_leaf=1000, algorithm="RGF",test_interval=100, loss="LS",verbose=False,l2=1.0)
        rgf3 = RGFRegressor(max_leaf=1000, algorithm="RGF",test_interval=100, loss="LS",verbose=False,l2=1.0)
        rgf4 = RGFRegressor(max_leaf=1000, algorithm="RGF",test_interval=100, loss="LS",verbose=False,l2=1.0)

        pipe1 = make_pipeline(extMACCS(), rgf)
        pipe2 = make_pipeline(extMorgan(), rgf1)
        pipe3 = make_pipeline(extDescriptor(), rgf2)
        pipe4 = make_pipeline(extPCA(), rgf3)
        pipe7 =make_pipeline(extDescriptor(), rgf4)
        pipe8 =make_pipeline(extDescriptor(), rgf4)

        xgb = xgboost.XGBRegressor()
        nbrs = KNeighborsRegressor(2)
        svr = SVR(gamma='auto',kernel='linear')
        sgd = SGDRegressor(max_iter=1000)
        pls = PLSRegression(n_components=3)
        ext = ExtraTreesRegressor(n_estimators=30,max_features= 20,min_samples_split= 5,max_depth= 50, min_samples_leaf= 5)

        pipe5 = make_pipeline(extMorgan(), nbrs)
        pipe6 = make_pipeline(extMACCS(), rgf)
        alldata = make_pipeline(extAll())

        meta = RandomForestRegressor(max_depth=20, random_state=0, n_estimators=400)

        stack1 = StackingRegressor(regressors=[pipe1, pipe2, pipe3], meta_regressor=rgf, verbose=1)
        #stack2 = StackingRegressor(regressors=[stack1,nbrs, svr,pls,rgf], meta_regressor=lgbm, verbose=1)
        stack2 = StackingRegressor(regressors=[stack1,pipe5,pipe7,pipe1], meta_regressor=rgf,verbose=1)

        scores = cross_val_score(stack2, X, y, cv=10)
        print("R^2 Score: %0.2f (+/- %0.2f) [%s]" % (scores.mean(), scores.std(), 'stacking'))
        stack1_score = cross_val_score(stack1,X,y, cv=10)
        rgf_score = cross_val_score(rgf,X,y,cv=10)

        stack2.fit(X_train, y_train)
        y_pred = stack2.predict(X_train)
        y_val = stack2.predict(X_test)
        print("Root Mean Squared Error train: %.4f" % calcRMSE(y_pred, y_train))
        print("Root Mean Squared Error test: %.4f" % calcRMSE(y_val, y_test))
        print('Correlation Coefficient train: %.4f' % calcCorr(y_pred, y_train))
        print('Correlation Coefficient test: %.4f' % calcCorr(y_val, y_test))

        rgf.fit(X_train, y_train)
        y_pred = rgf.predict(X_train)
        y_val = rgf.predict(X_test)
        print("Root Mean Squared Error train: %.4f" % calcRMSE(y_pred, y_train))
        print("Root Mean Squared Error test: %.4f" % calcRMSE(y_val, y_test))
        print('Correlation Coefficient train: %.4f' % calcCorr(y_pred, y_train))
        print('Correlation Coefficient test: %.4f' % calcCorr(y_val, y_test))

        pipe1.fit(X_train, y_train)
        y_pred = pipe1.predict(X_train)
        y_val = pipe1.predict(X_test)
        print("Root Mean Squared Error train: %.4f" % calcRMSE(y_pred, y_train))
        print("Root Mean Squared Error test: %.4f" % calcRMSE(y_val, y_test))
        print('Correlation Coefficient train: %.4f' % calcCorr(y_pred, y_train))
        print('Correlation Coefficient test: %.4f' % calcCorr(y_val, y_test))


        cols = np.arange(1,550,1).tolist()
        cols = X.columns.tolist()
        cols = [1,2,3]
        # Initializing Classifiers
        reg1 = Ridge(random_state=1)
        #reg2 = ExtraTreesRegressor()
        reg2 = ExtraTreesRegressor(n_estimators=50,max_features= 50,min_samples_split= 5,max_depth= 50, min_samples_leaf= 5)
        reg3 = SVR(gamma='auto',kernel='linear')
        reg4 = LGBMRegressor(boosting_type='gbdt', num_leaves= 60,learning_rate=0.06)
        pls = PLSRegression(n_components=3)
        pipe1 = make_pipeline(ColumnSelector(cols=cols), ExtraTreesRegressor(n_estimators=50))
        #linear =SGDRegressor(max_iter=1000)
        rgf = RGFRegressor(max_leaf=1000, algorithm="RGF",test_interval=100, loss="LS",verbose=False,l2=1.0)
        nbrs = KNeighborsRegressor(2)
        pipe2 = make_pipeline(ColumnSelector(cols=cols), KNeighborsRegressor(31))

        meta = ExtraTreesRegressor(n_estimators=50,max_features= 7,min_samples_split= 5,max_depth= 50, min_samples_leaf= 5)

        stackReg = StackingRegressor(regressors=[reg1,reg2, reg3,pipe1,pls,nbrs,rgf], meta_regressor=meta,verbose=1)
        stackReg.fit(X_train, y_train)
        y_pred = stackReg.predict(X_train)
        y_val = stackReg.predict(X_test)
        print("Root Mean Squared Error train: %.4f" % calcRMSE(y_pred,y_train))
        print("Root Mean Squared Error test: %.4f" % calcRMSE(y_val,y_test))
        print('Correlation Coefficient train: %.4f' % calcCorr(y_pred,y_train))
        print('Correlation Coefficient test: %.4f' % calcCorr(y_val,y_test))

        rgf.fit(X_train, y_train)
        y_pred = reg4.predict(X_train)
        y_val = reg4.predict(X_test)
        print("Root Mean Squared Error train: %.4f" % calcRMSE(y_pred,y_train))
        print("Root Mean Squared Error test: %.4f" % calcRMSE(y_val,y_test))
        print('Correlation Coefficient train: %.4f' % calcCorr(y_pred,y_train))
        print('Correlation Coefficient test: %.4f' % calcCorr(y_val,y_test))
Esempio n. 43
0
model_rf = RandomForestRegressor(n_estimators=200,
                                 max_features=0.26326530612244903,
                                 criterion='mse')
model_extra_tree = ExtraTreesRegressor(n_estimators=200, criterion='mse')
model_gb = GradientBoostingRegressor(n_estimators=100,
                                     max_depth=5,
                                     random_state=43)
model_lr = LinearRegression()
svr_rbf = SVR(kernel='rbf')
svr_lin = SVR(kernel='linear')
ridge = Ridge()
model_xgb2 = XGBRegressor(max_depth=10, n_estimators=100)
model_vote = VotingClassifier(
    estimators=[('xgb', model_xgb), ('rf', model_rf), ('gb', model_gb)])
sclf = StackingRegressor(regressors=[model_extra_tree, model_xgb2, model_rf],
                         meta_regressor=model_lr)

time_split = TimeSeriesSplit(n_splits=5)
print cross_val_score(sclf,
                      X=train.as_matrix(),
                      y=target.as_matrix(),
                      scoring=SMAPE,
                      cv=time_split).mean()

sclf.fit(X=train, y=target)
preds = sclf.predict(test)
sample_submission['y'] = preds
print sample_submission[sample_submission['y'] < 0]
sample_submission['y'] = sample_submission['y'].map(lambda x: x
                                                    if x > 0 else 0.0)
sample_submission.to_csv("my_submission_24_2.tsv", sep=',', index=False)
Esempio n. 44
0
                  xgbtrain,
                  num_boost_round=2889,
                  early_stopping_rounds=50,
                  evals=watchlist)

rfreg = RandomForestRegressor(random_state=1, max_depth=15)
ridge_reg = Ridge(normalize=True)
lasso_reg = Lasso()
linear_reg = LinearRegression(normalize=True)
stacking_reg = StackingRegressor(regressors=[rfreg, ridge_reg, lasso_reg],
                                 meta_regressor=linear_reg)

feature = [x for x in train_zero_var.columns if x not in ['Value']]
# X_train, X_test, y_train, y_test = train_test_split(train_zero_var[feature], train_zero_var['Value'], test_size=0.2,
#                                                     random_state=0)
stacking_reg.fit(X_train, y_train)
stacking_test = pd.DataFrame(stacking_reg.predict(X_test))
stacking_test.columns = ['stacking_pred']
y_test = pd.DataFrame(y_test)
y_test.columns = ['Value']
mean_squared_error(stacking_test['stacking_pred'], y_test['Value'])

train_zero_var = train_zero_var.reset_index()

# predict for Random Forest
rf_pred = pd.DataFrame()
for idx in range(0, 5):
    train = train_zero_var[train_zero_var['index'] % 5 != idx]
    test = train_zero_var[train_zero_var['index'] % 5 == idx]
    stacking_feature = [
        x for x in train.columns if x not in ['index', 'Value']
Esempio n. 45
0
def useXYtrain(x, y, times):
    flag = 0
    for i in range(0, len(Selected_learnerCode)):
        if Selected_learnerCode[i] != '':
            flag += 1
    if flag == 0:
        print('No proper learner\n')
        return
    stacking_MSE = [[], [], [], [], [], []]
    MSE = [[], [], [], [], [], [], []]
    R_square = [[], [], [], [], [], [], [], []]

    Ada_MSE = []
    Ada_r_square = []

    for i in range(0, times):
        print('第' + str(i + 1) + '次试验:\n')
        Learners_map = {}
        Learners = []
        X_train, X_test, y_train, y_test = train_test_split(x,
                                                            y,
                                                            test_size=0.20)
        svr = SVR(C=1.0, epsilon=0.2)
        parameters = {
            'C': np.logspace(-3, 3, 7),
            'gamma': np.logspace(-3, 3, 7)
        }
        print("GridSearch starting...")
        clfsvr = GridSearchCV(svr,
                              parameters,
                              n_jobs=-1,
                              scoring='neg_mean_squared_error')
        clfsvr.fit(X_train, y_train)

        print('The parameters of the best model are: ')
        print(clfsvr.best_params_)
        y_pred = clfsvr.best_estimator_.predict(X_test)
        # drawTrain(y_pred, y_test, 'SVR', i)
        # SVR_MSE.append(mean_squared_error(y_test, y_pred))

        yy = clfsvr.best_estimator_.predict(x)
        R_square[0].append(drawTrain(y, yy, 'SVR', i))
        MSE[0].append(mean_squared_error(y_test, y_pred))

        if 'SVR' in Selected_learnerCode:
            print('SVR Mean squared error is ' +
                  str(mean_squared_error(y_test, y_pred)) + "\n")
            Learners.append(clfsvr.best_estimator_)

        Learners_map['SVR'] = svr
        """ann = Regressor(layers = [Layer("Sigmoid", units=14),
                                   Layer("Linear")],
                         learning_rate = 0.02,
                         random_state = 2018,
                         n_iter = 10)

        ann.fit(X_train,y_train)
        y_pred = ann.predict(X_test)
        print('ANN Mean squared error is ' + str(mean_squared_error(y_test, y_pred)) + "\n")"""

        parameters = {'n_estimators': [10, 50, 100, 200, 300, 400, 500, 1000]}
        rfr = RandomForestRegressor(n_estimators=200, random_state=0)
        # drawTrain(rfr, x, y, 'RFR', i)
        # rfr = RandomForestRegressor(n_estimators=200, random_state=0)
        clfrfr = GridSearchCV(rfr,
                              parameters,
                              n_jobs=-1,
                              scoring='neg_mean_squared_error')
        clfrfr.fit(X_train, y_train)
        print('The parameters of the best model are: ')
        print(clfrfr.best_params_)
        y_pred = clfrfr.best_estimator_.predict(X_test)
        yy = clfrfr.best_estimator_.predict(x)
        MSE[1].append(mean_squared_error(y_test, y_pred))
        R_square[1].append(drawTrain(y, yy, 'RFR', i))
        # RFR_MSE.append(mean_squared_error(y_test, y_pred))

        if 'RFR' in Selected_learnerCode:
            print('RFR Mean squared error is ' +
                  str(mean_squared_error(y_test, y_pred)) + "\n")
            Learners.append(clfrfr.best_estimator_)

        Learners_map['RFR'] = rfr

        parameters = {'alpha': np.logspace(-2, 2, 5)}
        lasso = Lasso(alpha=0.05, random_state=1, max_iter=1000)
        # drawTrain(lasso, x, y, 'LASSO', i)
        clflasso = GridSearchCV(lasso,
                                parameters,
                                n_jobs=-1,
                                scoring='neg_mean_squared_error')
        clflasso.fit(X_train, y_train)
        yy = clflasso.best_estimator_.predict(x)
        print('The parameters of the best model are: ')
        print(clflasso.best_params_)
        y_pred = clflasso.best_estimator_.predict(X_test)
        R_square[2].append(drawTrain(y, yy, 'LASSO', i))
        MSE[2].append(mean_squared_error(y_test, y_pred))

        if 'LASSO' in Selected_learnerCode:
            print('LASSO  Mean squared error is ' +
                  str(mean_squared_error(y_test, y_pred)) + "\n")
            # file.write('LASSO  Mean squared error is ' + str(mean_squared_error(y_test, y_pred)) + "\n")
            Learners.append(clflasso.best_estimator_)

        Learners_map['LASSO'] = lasso

        # drawTrain(ENet, X_train, y_train,X_test,y_test, 'Elastic NET', i)
        parameters = {
            'alpha': np.logspace(-2, 2, 5),
            'l1_ratio': np.linspace(0, 1.0, 11)
        }
        # ENet = ElasticNet(alpha=0.05, l1_ratio=.9, random_state=3)
        # drawTrain(ENet, x, y, 'Elastic NET', i)
        ENet = ElasticNet(alpha=0.05, l1_ratio=.9, random_state=3)
        clfENet = GridSearchCV(ENet,
                               parameters,
                               n_jobs=-1,
                               scoring='neg_mean_squared_error')
        clfENet.fit(X_train, y_train)
        print('The parameters of the best model are: ')
        print(clfENet.best_params_)
        y_pred = clfENet.best_estimator_.predict(X_test)
        yy = clfENet.best_estimator_.predict(x)
        MSE[3].append(mean_squared_error(y_test, y_pred))
        R_square[3].append(drawTrain(y, yy, 'Elastic Net', i))
        if 'ENET' in Selected_learnerCode:
            print('Elastic Net Mean squared error is ' +
                  str(mean_squared_error(y_test, y_pred)) + "\n")
            Learners.append(clfENet.best_estimator_)

        Learners_map['ENET'] = ENet

        parameters = {'n_estimators': [100, 500, 1000, 2000, 3000, 5000]}
        GBoost = GradientBoostingRegressor(n_estimators=3000,
                                           learning_rate=0.05,
                                           max_depth=4,
                                           max_features='sqrt',
                                           min_samples_leaf=15,
                                           min_samples_split=10,
                                           loss='huber',
                                           random_state=5)
        clfGBoost = GridSearchCV(GBoost,
                                 parameters,
                                 n_jobs=-1,
                                 scoring='neg_mean_squared_error')
        clfGBoost.fit(X_train, y_train)
        print('The parameters of the best model are: ')
        print(clfGBoost.best_params_)
        y_pred = clfGBoost.best_estimator_.predict(X_test)
        yy = clfGBoost.best_estimator_.predict(x)
        MSE[4].append(mean_squared_error(y_test, y_pred))
        # GBoost_MSE.append(mean_squared_error(y_test, y_pred))
        R_square[4].append(drawTrain(y, yy, 'Gradient Boosting', i))
        if 'GBOOST' in Selected_learnerCode:
            print('GBoost squared error is ' +
                  str(mean_squared_error(y_test, y_pred)) + "\n")
            Learners.append(clfGBoost.best_estimator_)

        Learners_map['GBOOST'] = GBoost

        # Adaboost
        # Adaboost = AdaBoostRegressor(base_estimator=SVR(C=1.0, epsilon=0.2))
        Adaboost = AdaBoostRegressor()
        Adaboost.fit(X_train, y_train)
        y_pred = Adaboost.predict(X_test)
        yy = Adaboost.predict(x)
        R_square[5].append(drawTrain(y, yy, 'Adaboost', i))
        print('Adaboost with SVR squared error is ' +
              str(mean_squared_error(y_test, y_pred)) + "\n")
        Ada_MSE.append(mean_squared_error(y_test, y_pred))

        # BAGGING
        baggingModel = baggingAveragingModels(
            models=(clfsvr.best_estimator_, clfrfr.best_estimator_,
                    clfENet.best_estimator_, clfGBoost.best_estimator_,
                    clflasso.best_estimator_))
        baggingModel.fit(X_train, y_train)
        y_pred = baggingModel.predict(X_test)
        MSE[5].append(mean_squared_error(y_test, y_pred))
        yy = baggingModel.predict(x)
        R_square[6].append(drawTrain(y, yy, 'Bagging', i))
        print('Bagging before selected squared error is ' +
              str(mean_squared_error(y_test, y_pred)) + "\n")

        baggingModel = baggingAveragingModels(models=tuple(Learners))
        # drawTrain(baggingModel, X_train, y_train,X_test,y_test, 'Bagging', i)
        # baggingModel = baggingAveragingModels(models=tuple(Learners))

        baggingModel.fit(X_train, y_train)
        y_pred = baggingModel.predict(X_test)
        MSE[6].append(mean_squared_error(y_test, y_pred))
        yy = baggingModel.predict(x)
        R_square[7].append(drawTrain(y, yy, 'Bagging', i))

        print('Bagging after selected squared error is ' +
              str(mean_squared_error(y_test, y_pred)) + "\n")
        stacking_R_square = [[], [], [], [], [], []]
        All_learner = ['SVR', 'RFR', 'LASSO', 'ENET', 'GBOOST']
        for k in range(0, len(Selected_learnerCode)):
            """learnerList = []
            for kk in range(0,len(Selected_learnerCode)):
                if Selected_learnerCode[kk]!='' :
                    learnerList.append(Learners_map[Selected_learnerCode[kk]])"""
            """stacked_averaged_models = StackingAveragedModels(base_models=tuple(learnerList),
                                                             meta_model=Learners_map[All_learner[k]])
            drawTrain(stacked_averaged_models, X_train, y_train,X_test,y_test, 'stacking with '+All_learner[k], i)"""
            # stacked_averaged_models = StackingAveragedModels(base_models=tuple(learnerList),
            #                                                 meta_model=Learners_map[All_learner[k]])
            params = {}
            """
            if 'SVR' in Selected_learnerCode:
                params['svr__C'] = np.logspace(-3, 3, 7)
                params['svr__gamma'] = np.logspace(-3, 3, 7)

            if 'RFR' in Selected_learnerCode:
                params['randomforestregressor__n_estimators'] =[10, 50, 100, 500, 1000]

            if 'LASSO' in Selected_learnerCode:
                params['lasso__alpha'] = np.logspace(-2, 2, 5)

            if 'ENET' in Selected_learnerCode:
                params['elasticnet__alpha'] = np.logspace(-2, 2, 5)

            if 'GBOOST' in Selected_learnerCode:
                params['gradientboostingregressor__n_estimators']= [100, 500, 1000, 2000, 3000, 5000]"""

            if k == 0:
                params['meta-svr__C'] = np.logspace(-3, 3, 7)
                params['meta-svr__gamma'] = np.logspace(-3, 3, 7)
            if k == 1:
                params['meta-randomforestregressor__n_estimators'] = [
                    10, 50, 100, 500, 1000
                ]
            if k == 2:
                params['meta-lasso__alpha'] = np.logspace(-2, 2, 5)
            if k == 3:
                params['meta-elasticnet__alpha'] = np.logspace(-2, 2, 5)
            if k == 4:
                params['meta-gradientboostingregressor__n_estimators'] = [
                    100, 500, 1000, 2000, 3000, 5000
                ]
            """
            params = {'svr__C': np.logspace(-3, 3, 7),
                      'svr__gamma': np.logspace(-3, 3, 7),
                      'randomforestregressor__n_estimators': [10, 50, 100, 500, 1000],
                      'lasso__alpha': np.logspace(-2, 2, 5),
                      'elasticnet__alpha':np.logspace(-2, 2, 5),
                      'gradientboostingregressor__n_estimators': [100, 500, 1000, 2000, 3000, 5000],
                      }"""
            stacked_averaged_models = StackingRegressor(
                regressors=Learners,
                meta_regressor=Learners_map[All_learner[k]])
            grid = GridSearchCV(estimator=stacked_averaged_models,
                                param_grid=params)
            grid.fit(X_train, y_train)
            y_pred = grid.best_estimator_.predict(X_test)
            yy = grid.best_estimator_.predict(x)
            stacking_R_square[k].append(
                drawTrain(y, yy, 'stacking with ' + All_learner[k], i))
            print('Stacking with metamodel is ' + All_learner[k] +
                  ' squared error is ' +
                  str(mean_squared_error(y_test, y_pred)) + "\n")
            # file.write('Stacking with metamodel is lasso squared error is ' + str(mean_squared_error(y_test, y_pred)) + "\n")
            stacking_MSE[k].append(mean_squared_error(y_test, y_pred))

        # stacked_averaged_models = StackingAveragedModels(base_models=tuple(learnerList),
        #                                                 meta_model=baggingModel)
        # drawTrain(stacked_averaged_models, X_train, y_train, X_test, y_test, 'stacking with Bagging models'  , i)
        """stacked_averaged_models = StackingAveragedModels(base_models=tuple(learnerList),
                                                         meta_model=Learners_map[All_learner[k]])"""
        stacked_averaged_models = StackingRegressor(
            regressors=Learners, meta_regressor=baggingModel)
        # grid = GridSearchCV(estimator=stacked_averaged_models, param_grid=params)
        stacked_averaged_models.fit(X_train, y_train)
        y_pred = stacked_averaged_models.predict(X_test)
        yy = stacked_averaged_models.predict(x)
        stacking_R_square[5].append(
            drawTrain(y, yy, 'stacking with bagging', i))
        print('Stacking with metamodel is bagging models squared error is ' +
              str(mean_squared_error(y_test, y_pred)) + "\n")
        # file.write('Stacking with metamodel is lasso squared error is ' + str(mean_squared_error(y_test, y_pred)) + "\n")
        stacking_MSE[5].append(mean_squared_error(y_test, y_pred))

        gc.collect()

    print("Adaboost mean is " + str(np.mean(Ada_MSE)))

    min_stacking_MSE = []
    for i in range(0, times):
        minMSE = stacking_MSE[0][i]
        for j in range(1, 6):
            if stacking_MSE[j][i] < minMSE:
                minMSE = stacking_MSE[j][i]
        min_stacking_MSE.append(minMSE)

    plot_x = np.linspace(1, times, times)
    if len(MSE[0]) > 0:
        plt.plot(plot_x, MSE[0], 'b')
    if len(MSE[1]) > 0:
        plt.plot(plot_x, MSE[1], 'r')
    if len(MSE[2]) > 0:
        plt.plot(plot_x, MSE[2], 'y')
    if len(MSE[3]) > 0:
        plt.plot(plot_x, MSE[3], 'k')
    if len(MSE[4]) > 0:
        plt.plot(plot_x, MSE[4], 'g')
    if len(MSE[5]) > 0:
        plt.plot(plot_x, MSE[5], 'm')
    if len(MSE[6]) > 0:
        plt.plot(plot_x, MSE[6], color='coral', linestyle=':', marker='|')
    plt.plot(plot_x, min_stacking_MSE, color='cyan')
    plt.xlabel('Repeat times')
    plt.ylabel('MSE')
    plt.legend(
        ('SVR avg = ' + str(np.mean(MSE[0])), 'RFR avg = ' +
         str(np.mean(MSE[1])), 'Lasso avg=' + str(np.mean(MSE[2])),
         'Enet avg=' + str(np.mean(MSE[3])), 'Gboost avg = ' +
         str(np.mean(MSE[4])), 'Bagging before avg = ' + str(np.mean(MSE[5])),
         'Bagging after avg = ' + str(np.mean(MSE[6])),
         'St-LIBS avg = ' + str(np.mean(min_stacking_MSE))),
        loc='upper right')
    plt.title('Different learning machine')
    plt.savefig('DifferentLearner.png')
    plt.clf()
    plt.plot()

    plot_x = np.linspace(1, times, times)
    plt.plot(plot_x, Ada_MSE, 'b')
    plt.plot(plot_x, MSE[6], 'r')
    plt.plot(plot_x, min_stacking_MSE, 'g')
    plt.legend(('Adaboost avg = ' + str(np.mean(Ada_MSE)),
                'Bagging avg = ' + str(np.mean(MSE[6])),
                'St-LIBS avg = ' + str(np.mean(min_stacking_MSE))),
               loc='upper right')
    plt.title('Bagging VS St-LIBS VS Adaboost')
    plt.xlabel('Repeat times')
    plt.ylabel('MSE')
    plt.savefig('Bagging VS St-LIBS&Adaboost.png')
    plt.clf()
    plt.plot()

    plot_x = np.linspace(1, times, times)
    if len(stacking_MSE[0]) > 0:
        plt.plot(plot_x, stacking_MSE[0], 'b')
    if len(stacking_MSE[1]) > 0:
        plt.plot(plot_x, stacking_MSE[1], 'r')
    if len(stacking_MSE[2]) > 0:
        plt.plot(plot_x, stacking_MSE[2], 'y')
    if len(stacking_MSE[3]) > 0:
        plt.plot(plot_x, stacking_MSE[3], 'k')
    if len(stacking_MSE[4]) > 0:
        plt.plot(plot_x, stacking_MSE[4], 'g')
    if len(stacking_MSE[5]) > 0:
        plt.plot(plot_x, stacking_MSE[5], 'm')
    plt.legend(('SVR avg = ' + str(np.mean(stacking_MSE[0])),
                'RFR avg = ' + str(np.mean(stacking_MSE[1])),
                'Lasso avg=' + str(np.mean(stacking_MSE[2])),
                'Enet avg=' + str(np.mean(stacking_MSE[3])),
                'Gboost avg = ' + str(np.mean(stacking_MSE[4])),
                'Bagging avg = ' + str(np.mean(stacking_MSE[5]))),
               loc='upper right')
    plt.title('Different meta-learning machine(Adaboost avg MSE=' +
              str(np.mean(Ada_MSE)) + ')')
    plt.xlabel('Repeat times')
    plt.ylabel('MSE')
    plt.savefig('DifferentMetaLearner.png')
    plt.clf()
    plt.plot()
    index = ['SVR', 'RFR', 'LASSO', 'ENET', 'Gboost', 'BAGGING1', 'BAGGING2']
    mse_file = pd.DataFrame(index=index, data=MSE)
    mse_file.to_csv('MSE.csv', encoding='utf-8')

    index = ['SVR', 'RFR', 'LASSO', 'ENET', 'Gboost', 'BAGGING']
    mse_file = pd.DataFrame(index=index, data=stacking_MSE)
    mse_file.to_csv('stacking_MSE.csv', encoding='utf-8')

    mse_file = pd.DataFrame(data=min_stacking_MSE)
    mse_file.to_csv('min_stacking_MSE.csv', encoding='utf-8')

    index = [
        'SVR', 'RFR', 'LASSO', 'ENET', 'Gboost', 'Adaboost', 'BAGGING1',
        'BAGGING2'
    ]
    r_file = pd.DataFrame(index=index, data=R_square)
    r_file.to_csv('R_square.csv', encoding='utf-8')

    index = ['SVR', 'RFR', 'LASSO', 'ENET', 'Gboost', 'BAGGING']
    mse_file = pd.DataFrame(index=index, data=stacking_R_square)
    mse_file.to_csv('stacking_R_square.csv', encoding='utf-8')
Esempio n. 46
0
gbm_penetration_rate = lgb.LGBMRegressor(
    n_estimators=200,
    subsample_freq=1,
    subsample=0.8,
    colsample_bytree=0.8,
    learning_rate=0.05,
    max_depth=8,
    num_leaves=256,
    objective='xentropy',
    device='gpu',
)

xgb_penetration_rate = xgb.XGBRegressor(n_estimators=200,
                                        subsample_freq=1,
                                        subsample=0.7,
                                        colsample_bytree=0.7,
                                        learning_rate=0.1,
                                        max_depth=8,
                                        num_leaves=256,
                                        objective='reg:logistic',
                                        n_jobs=-1)

meta_reg = Ridge()

stregr = StackingRegressor(
    regressors=[gbm_penetration_rate, xgb_penetration_rate],
    meta_regressor=meta_reg)

stregr.fit(X_train, y_train[:, 0])
print(1 - stregr.score(X_val, y_val[:, 0]))
Esempio n. 47
0
                               silent=1,
                               random_state=7,
                               nthread=-1)
gbm_b = GradientBoostingRegressor(learning_rate=0.05,
                                  n_estimators=2000,
                                  max_depth=4,
                                  max_features='log2',
                                  min_samples_leaf=15,
                                  min_samples_split=10,
                                  loss='huber')

stackmodel = StackingRegressor(
    regressors=[ElNet_b, lasso_b, ridge_b, svr_b, model_xgb_b, gbm_b],
    meta_regressor=Lasso(alpha=0.00035))

stackmodel.fit(x_train, y_train)

stacked = stackmodel.predict(x_test)
rmse_stacked = np.sqrt(mean_squared_error(y_train,
                                          stackmodel.predict(x_train)))
stacked_pred = np.expm1(stacked)

# Averaged model
ensembled = np.expm1((0.25 * ridge.predict(x_test).reshape(-1, 1)) +
                     (0.2 * ElNet.predict(x_test).reshape(-1, 1)) +
                     (0.2 * lasso.predict(x_test).reshape(-1, 1)) +
                     (0.15 * model_xgb.predict(x_test).reshape(-1, 1)) +
                     (0.2 * GBoost.predict(x_test).reshape(-1, 1)))

# Print the performance of each model
obj = pd.DataFrame([[
Esempio n. 48
0
plt.ylabel('Accuracy')
plt.show()

# In[368]:

from mlxtend.regressor import StackingRegressor
lr = LinearRegression()
sclf = StackingRegressor(regressors=[grid_search, abr, rfr], meta_regressor=lr)
print('3-fold cross validation:\n')
for clf, label in zip([grid_search, abr, rfr, sclf],
                      ['grid_search', 'abr', 'rfr', 'StackingClassifier']):
    scores = cross_val_score(clf, X, y)

    print("Accuracy: %0.2f (+/- %0.2f) [%s]" %
          (scores.mean(), scores.std(), label))
sclf.fit(X_train, y_train)
predictions = sclf.predict(X_test)

# In[370]:

train_sizes, train_score, test_score = learning_curve(
    sclf, X, y, train_sizes=[0.1, 0.2, 0.4, 0.6, 0.8, 1], cv=3)
train_error = 1 - np.mean(train_score, axis=1)
test_error = 1 - np.mean(test_score, axis=1)
plt.plot(train_sizes, 1 - train_error, 'o-', color='r', label='training')
plt.plot(train_sizes, 1 - test_error, 'o-', color='g', label='testing')
plt.legend(loc='best')
plt.xlabel('traing examples')
plt.ylabel('Accuracy')
plt.show()