예제 #1
0
파일: sin.py 프로젝트: vishalbelsare/bartpy
def run(size=100, alpha=0.95, beta=2.0, n_trees=50):

    import warnings

    warnings.simplefilter("error", UserWarning)
    x = np.linspace(0, 5, size)
    X = pd.DataFrame(x)
    y = np.random.normal(0, 0.1, size=size) + np.sin(x)

    model = SklearnModel(n_samples=100,
                         n_burn=50,
                         n_trees=n_trees,
                         alpha=alpha,
                         beta=beta,
                         n_jobs=1,
                         n_chains=1)

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.33,
                                                        random_state=42,
                                                        shuffle=True)

    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    plt.scatter(y_test, y_pred)
    plt.show()

    rmse = np.sqrt(np.sum(np.square(y_test - y_pred)))
    print(rmse)
예제 #2
0
def run(alpha, beta, n_trees, size=100):
    import warnings

    warnings.simplefilter("error", UserWarning)
    x = np.linspace(0, 5, size)
    X = pd.DataFrame(x)
    y = np.random.normal(0, 0.1, size=size) + np.sin(x)

    model = SklearnModel(n_samples=500,
                         n_burn=100,
                         n_trees=n_trees,
                         alpha=alpha,
                         beta=beta,
                         n_jobs=1,
                         n_chains=1)
    model.fit(X, y)
    plt.plot(model.data.unnormalized_y)
    plt.plot(model.predict())
    plt.show()
    plot_tree_depth(model)
    plot_feature_split_proportions(model)
    plot_qq(model)
    #null_distr = null_feature_split_proportions_distribution(model, X, y)
    #print(null_distr)
    return model, x, y
예제 #3
0
파일: sin.py 프로젝트: vishalbelsare/bartpy
def run(alpha, beta, n_trees, size=100):
    import warnings

    warnings.simplefilter("error", UserWarning)
    x = np.linspace(0, 5, size)
    X = pd.DataFrame(x)
    y = np.random.normal(0, 1.0, size=size) + np.sin(x)
    from bartpy.samplers.unconstrainedtree.treemutation import get_tree_sampler

    model = SklearnModel(n_samples=50,
                         n_burn=50,
                         n_trees=n_trees,
                         alpha=alpha,
                         beta=beta,
                         n_jobs=1,
                         n_chains=1,
                         tree_sampler=get_tree_sampler(0.5, 0.5))
    model.fit(X, y)
    plt.plot(y)
    plt.plot(model.predict(X))
    plt.show()
    # plot_tree_depth(model)
    # plot_feature_split_proportions(model)
    # plot_qq(model)
    # null_distr = null_feature_split_proportions_distribution(model, X, y)
    # print(null_distr)
    return model, x, y
예제 #4
0
def plot_homoskedasity_diagnostics(model: SklearnModel, ax=None):
    if ax is None:
        _, ax = plt.subplots(1, 1, figsize=(5, 5))
    sns.regplot(model.predict(model.data.X.values), model.residuals(model.data.X.values), ax=ax)
    ax.set_title("Fitted Values V Residuals")
    ax.set_xlabel("Fitted Value")
    ax.set_ylabel("Residual")
    return ax
예제 #5
0
def run(alpha, beta, n_trees):
    x = np.linspace(0, 5, 3000)
    X = pd.DataFrame(x)
    y = np.random.normal(0, 0.1, size=3000) + np.sin(x)

    model = SklearnModel(n_samples=50, n_burn=50, n_trees=n_trees, alpha=alpha, beta=beta)
    model.fit(X, y)
    plt.plot(model.data.unnormalized_y)
    plt.plot(model.predict(X))
    plt.show()
    plot_tree_depth(model.model_samples)
    plot_feature_split_proportions(model.model_samples)
    plot_qq(model)
    #null_distr = null_feature_split_proportions_distribution(model, X, y)
    #print(null_distr)
    return model, x, y
예제 #6
0
파일: ols.py 프로젝트: stjordanis/bartpy
def run(alpha, beta, n_trees, n_regressors):
    b_true = np.random.uniform(-2, 2, size=n_regressors)
    x = np.random.normal(0, 1, size=10000 * n_regressors).reshape(
        10000, n_regressors)
    x[:5000, 1] = 4
    X = pd.DataFrame(x)
    y = np.random.normal(0, 0.1, size=10000) + np.array(
        X.multiply(b_true, axis=1).sum(axis=1))
    model = SklearnModel(n_samples=200,
                         n_burn=50,
                         n_trees=n_trees,
                         alpha=alpha,
                         beta=beta)
    model.fit(X, y)
    predictions = model.predict()
    plt.scatter(y, predictions)
    plt.show()
    return model, x, y
예제 #7
0
 def predict(self, X: np.ndarray = None) -> np.ndarray:
     if X is None:
         X = self.data.X
     sm_prediction = self.base_estimator.predict(X)
     bart_prediction = SklearnModel.predict(self, X)
     return sm_prediction + bart_prediction
예제 #8
0
#gs_xgb = GridSearchCV(estimator=pipe_bart,
#                     param_grid=params_bart,
#                    cv=loo)
# Fit grid search
#gs_xgb.fit(X_train, y_train.ravel())
# Best params
#print('Best params: %s' % gs_xgb.best_params_)
# Best training data accuracy
#print('Best training score: %.3f' % gs_xgb.best_score_)
# Predict on test data with best params
for cutoff in [0.1, 0.5]:
    for n_chains in [3, 4, 5]:
        for n_trees in [25, 50, 100]:
            for n_burn in [100, 200, 300]:
                for n_samples in [100, 50, 200]:
                    for sigma_b in [0.0001, 0.01, 0.001]:
                        for sigma_a in [0.0001, 0.01, 0.001]:
                            gs_xgb = SklearnModel(sigma_a=sigma_a,
                                                  sigma_b=sigma_b,
                                                  n_samples=n_samples).fit(
                                                      X_train, y_train)
                            y_pred = gs_xgb.predict(X_test)
                            # Test data accuracy of model with best params
                            #print('Test set score score for best params: %.3f ' % mean_squared_error(y_test, y_pred))
                            print(
                                'Test set score score for best params: %.3f ' %
                                f1_score(y_test, y_pred > cutoff))
                            print("n samples", {n_samples}, "\n b ", sigma_b,
                                  "\na ", sigma_a, "\ncutoff ", cutoff)
예제 #9
0
def plot_residuals(model: SklearnModel):
    plt.plot(model.data.unnormalized_y - model.predict())
    plt.show()
예제 #10
0
def plot_modelled_against_actual(model: SklearnModel):
    plt.plot(model.data.unnormalized_y)
    plt.plot(model.predict())
    plt.show()
예제 #11
0
파일: BART.py 프로젝트: kiminh/BV-NICE
])

#----------------------------------------------------------------
#
#     MODELS
#
#----------------------------------------------------------------

n_trees = 100  # default is 200 trees

model0 = SklearnModel(n_trees=n_trees)  # Use default parameters
model0.fit(X0, Y0)  # Fit the model
model1 = SklearnModel(n_trees=n_trees)  # Use default parameters
model1.fit(X1, Y1)  # Fit the model

tau_hat = model1.predict(X) - model0.predict(X)
# tau_hat_val = model1.predict(X_val) - model0.predict(X_val)
# tau_hat_test = model1.predict(X_test) - model0.predict(X_test)

pehe_ = eval_pehe(tau_hat, Tau)

tau_hat_val = model1.predict(X_val) - model0.predict(X_val)
tau_hat_test = model1.predict(X_test) - model0.predict(X_test)

pehe_val = eval_pehe(tau_hat_val, Tau_val)
pehe_test = eval_pehe(tau_hat_test, Tau_test)

print(pehe_)
print(pehe_val)
print(pehe_test)
예제 #12
0
 def predict(self, X: np.ndarray = None):
     if X is None:
         X = self.data.X
     sm_prediction = self.stat_model_fit.predict(X)
     bart_prediction = SklearnModel.predict(self, X)
     return sm_prediction + bart_prediction