def train(params=[10**-6, 10**-6, 10**-6, 10**-6]):
    global data_products, model_products

    alpha1, alpha2, lambda1, lambda2 = params

    for product_id in data_products:
        data = data_products[product_id].dropna()
        if len(data.index) <= 0:
            return
        X = data[[
            'amount_of_all_competitors', 'average_price_on_market',
            'distance_to_cheapest_competitor', 'price_rank', 'quality_rank'
        ]]
        y = data['sold'].copy()
        y[y > 1] = 1

        model = BayesianRidge(n_iter=1000, tol=0.0001, normalize=True)
        model.set_params(alpha_1=alpha1,
                         alpha_2=alpha2,
                         lambda_1=lambda1,
                         lambda_2=lambda2)

        model.fit(X, y)

        model_products[product_id] = model
Beispiel #2
0
def bayes_regr(X, Y):
    regr = BayesianRidge(compute_score=True)
    regr.set_params(alpha_1=10, lambda_1=1e-3)
    regr.fit(X, Y)

    w_hat = regr.coef_
    # Y_pred = regr.predict(X)

    return w_hat
def plot_bayesian_ridge_curvefit():
    def func(x):
        return np.sin(2 * np.pi * x)

    # #############################################################################
    # Generate sinusoidal data with noise
    size = 25
    rng = np.random.RandomState(1234)
    x_train = rng.uniform(0., 1., size)
    y_train = func(x_train) + rng.normal(scale=0.1, size=size)
    x_test = np.linspace(0., 1., 100)

    # #############################################################################
    # Fit by cubic polynomial
    n_order = 3
    X_train = np.vander(x_train, n_order + 1, increasing=True)
    X_test = np.vander(x_test, n_order + 1, increasing=True)

    # #############################################################################
    # Plot the true and predicted curves with log marginal likelihood (L)
    reg = BayesianRidge(tol=1e-6, fit_intercept=False, compute_score=True)
    fig, axes = plt.subplots(1, 2, figsize=(8, 4))
    for i, ax in enumerate(axes):
        # Bayesian ridge regression with different initial value pairs
        if i == 0:
            init = [1 / np.var(y_train), 1.]  # Default values
        elif i == 1:
            init = [1., 1e-3]
            reg.set_params(alpha_init=init[0], lambda_init=init[1])
        reg.fit(X_train, y_train)
        ymean, ystd = reg.predict(X_test, return_std=True)

        ax.plot(x_test, func(x_test), color="blue", label="sin($2\\pi x$)")
        ax.scatter(x_train, y_train, s=50, alpha=0.5, label="observation")
        ax.plot(x_test, ymean, color="red", label="predict mean")
        ax.fill_between(x_test,
                        ymean - ystd,
                        ymean + ystd,
                        color="pink",
                        alpha=0.5,
                        label="predict std")
        ax.set_ylim(-1.3, 1.3)
        ax.legend()
        title = "$\\alpha$_init$={:.2f},\\ \\lambda$_init$={}$".format(
            init[0], init[1])
        if i == 0:
            title += " (Default)"
        ax.set_title(title, fontsize=12)
        text = "$\\alpha={:.1f}$\n$\\lambda={:.3f}$\n$L={:.1f}$".format(
            reg.alpha_, reg.lambda_, reg.scores_[-1])
        ax.text(0.05, -1.0, text, fontsize=12)

    plt.tight_layout()
    plt.show()
class BayesianLinearRegression:
    def __init__(self, alphaInit=1., lambdaInit=0.2):
        self.alphaInit = alphaInit
        self.lambdaInit = lambdaInit
        self.clf = BayesianRidge(fit_intercept=False)
        self.clf.set_params(alpha_init=self.alphaInit,
                            lambda_init=self.lambdaInit)

    def fit(self, X_train, y_train):
        self.clf.fit(X_train, y_train)

    def predict(self, X_test):
        return self.clf.predict(X_test.astype(np.float32), return_std=True)
Beispiel #5
0
def find_best_bayesian_ridge(X_train, y_train):
    reg = BayesianRidge(compute_score=True, tol=1e-5)
    parameters = {
        'alpha_init': (0.2, 0.5, 1, 1.5),
        'lambda_init': [1e-3, 1e-4, 1e-5, 1e-6]
    }
    srch = GridSearchCV(reg, parameters)
    srch.fit(X_train, y_train)
    params = srch.get_params()

    reg.set_params(alpha_init=params["estimator__alpha_init"],
                   lambda_init=params["estimator__lambda_init"])
    reg.fit(X_train, y_train)

    return reg, params
Beispiel #6
0
 def trainModels(self, dates:np.array, scaler:MinMaxScaler):
     # First, splitting data
     x_train, x_test, y_train, y_test = train_test_split(scaler.transform(dates.reshape(-1, 1)), self.data_scaler.transform(self.tendency.values.reshape(-1 , 1)), test_size=0.2)
     x_train = np.vander(x_train.reshape(-1), self.n + 1, increasing=True)
     x_test = np.vander(x_test.reshape(-1), self.n + 1, increasing=True)
     # Bayesian Ridge
     bayRidge = BayesianRidge(tol=1e-6, compute_score=True)
     bayRidge.set_params(alpha_init=1, lambda_init=0.001)
     bayRidge.fit(x_train, y_train.reshape(-1))
     bayScore = bayRidge.score(x_test, y_test.reshape(-1))
     # SVR
     svr_rbf = SVR(kernel='rbf', C=1, gamma=0.25)
     svr_rbf.fit(x_train, y_train.reshape(-1))
     svr_score = svr_rbf.score(x_test, y_test.reshape(-1))
     return bayRidge, svr_rbf, np.array([bayScore, svr_score])
def cross_validate(params):
    global test_data_products, model_products

    alpha1, alpha2, lambda1, lambda2 = params

    data = test_data_products[1].dropna()
    if len(data.index) <= 0:
        return 0
    X = data[[
        'amount_of_all_competitors', 'average_price_on_market',
        'distance_to_cheapest_competitor', 'price_rank', 'quality_rank'
    ]]
    y = data['sold'].copy()
    y[y > 1] = 1
    model = BayesianRidge(n_iter=1000, tol=0.0001, normalize=True)
    model.set_params(alpha_1=alpha1,
                     alpha_2=alpha2,
                     lambda_1=lambda1,
                     lambda_2=lambda2)

    score = -np.mean(cross_val_score(model, X, y, cv=3, scoring='r2'))
    return score
X_test = np.vander(x_test, n_order + 1, increasing=True)
reg = BayesianRidge(tol=1e-6, fit_intercept=False, compute_score=True)

# %%
# Plot the true and predicted curves with log marginal likelihood (L)
# -------------------------------------------------------------------
import matplotlib.pyplot as plt

fig, axes = plt.subplots(1, 2, figsize=(8, 4))
for i, ax in enumerate(axes):
    # Bayesian ridge regression with different initial value pairs
    if i == 0:
        init = [1 / np.var(y_train), 1.0]  # Default values
    elif i == 1:
        init = [1.0, 1e-3]
        reg.set_params(alpha_init=init[0], lambda_init=init[1])
    reg.fit(X_train, y_train)
    ymean, ystd = reg.predict(X_test, return_std=True)

    ax.plot(x_test, func(x_test), color="blue", label="sin($2\\pi x$)")
    ax.scatter(x_train, y_train, s=50, alpha=0.5, label="observation")
    ax.plot(x_test, ymean, color="red", label="predict mean")
    ax.fill_between(x_test,
                    ymean - ystd,
                    ymean + ystd,
                    color="pink",
                    alpha=0.5,
                    label="predict std")
    ax.set_ylim(-1.3, 1.3)
    ax.legend()
    title = "$\\alpha$_init$={:.2f},\\ \\lambda$_init$={}$".format(
Beispiel #9
0
        break


X_train.shape, X_test_scaled.shape

y_mlp = best_model.predict(X_test_scaled)

reg = BayesianRidge(compute_score=True, tol=1e-5)

parameters = {'alpha_init':(0.2, 0.5, 1, 1.5), 'lambda_init':[1e-3, 1e-4, 1e-5,1e-6]}
srch = GridSearchCV(reg, parameters)

srch.fit(X_train, y_train)

params = srch.get_params()
reg.set_params(alpha_init=params["estimator__alpha_init"], lambda_init=params["estimator__lambda_init"]) 
reg.fit(X_train, y_train)
ymean, ystd = reg.predict(X_test, return_std=True)

# exit()

folder = "./models/seirhcd/{}".format(current_dataset_date)
os.makedirs(folder, exist_ok=True)

joblib.dump(best_model, '{}/mlp.save'.format(folder))
joblib.dump(scaler, "{}/scaler.save".format(folder)) 

with open('{}/metrics.json'.format(folder), 'w') as fp:
    json.dump({"perf":reports,"std_test":list(ystd.values), "columns":columns, "countries":list(all_countries)}, fp)

merged.to_csv('{}/features.csv'.format(folder))
# Fit by cubic polynomial
n_order = 3
X_train = np.vander(x_train, n_order + 1, increasing=True)
X_test = np.vander(x_test, n_order + 1, increasing=True)

# #############################################################################
# Plot the true and predicted curves with log marginal likelihood (L)
reg = BayesianRidge(tol=1e-6, fit_intercept=False, compute_score=True)
fig, axes = plt.subplots(1, 2, figsize=(8, 4))
for i, ax in enumerate(axes):
    # Bayesian ridge regression with different initial value pairs
    if i == 0:
        init = [1 / np.var(y_train), 1.]  # Default values
    elif i == 1:
        init = [1., 1e-3]
        reg.set_params(alpha_init=init[0], lambda_init=init[1])
    reg.fit(X_train, y_train)
    ymean, ystd = reg.predict(X_test, return_std=True)

    ax.plot(x_test, func(x_test), color="blue", label="sin($2\\pi x$)")
    ax.scatter(x_train, y_train, s=50, alpha=0.5, label="observation")
    ax.plot(x_test, ymean, color="red", label="predict mean")
    ax.fill_between(x_test, ymean-ystd, ymean+ystd,
                    color="pink", alpha=0.5, label="predict std")
    ax.set_ylim(-1.3, 1.3)
    ax.legend()
    title = "$\\alpha$_init$={:.2f},\\ \\lambda$_init$={}$".format(
            init[0], init[1])
    if i == 0:
        title += " (Default)"
    ax.set_title(title, fontsize=12)
Beispiel #11
0
def bayes_regression(params):
    clf = BayesianRidge()
    clf.set_params(**params)
    return clf