コード例 #1
0
def cross_validation_regression(X, Y, M, number_rounds, times_crossvalidation):
    scores = []
    n = len(X)
    n_train = int(n * 0.8)

    for i in range(times_crossvalidation):
        time_begin = datetime.datetime.now()
        print("Begin " + str(i + 1) + " of " + str(times_crossvalidation) +
              " at " + str(time_begin.strftime("%A, %d. %B %Y %I:%M%p")))

        np.random.shuffle(X)
        X_train, Y_train = X[:n_train], Y[:n_train]
        X_test, Y_test = X[n_train:], Y[n_train:]

        bank = BaNK_regression.bank_regression(X_train, Y_train, M)
        bank.learn_kernel(number_rounds)
        time_end = datetime.datetime.now()
        print("End " + str(i + 1) + " of " + str(times_crossvalidation) +
              " at " + str(time_end.strftime("%A, %d. %B %Y %I:%M%p")))
        print("Training time: " + str(time_end - time_begin))
        Y_predicted = bank.predict_new_X_beta_omega(X_test)
        # error = (Y_test - Y_predicted) / rangey
        error = (Y_test - Y_predicted)
        error_mean = np.abs(error).mean()
        error_std = np.abs(error).std()
        scores.append([error_mean, error_std])
    return np.array(scores)
コード例 #2
0
def mauna_atmospheric(M, swaps):
    X, y = load_mauna_loa_atmospheric_co2()
    X = X.reshape(len(X), )
    bank = BaNK_regression.bank_regression(X, y, M)
    bank.learn_kernel(swaps, X, y, "Mauna LOA CO2")

    X_ = np.linspace(X.min(), X.max() + 3, 1000)[:, np.newaxis]
    y_pred = bank.predict(X_)
    bank.sample_beta_sigma()
    y_std = bank.get_beta_sigma()[1]

    # Illustration
    plt.scatter(X, y, c='k')
    plt.plot(X_, y_pred)
    plt.fill_between(X_[:, 0],
                     y_pred - y_std,
                     y_pred + y_std,
                     alpha=0.5,
                     color='k')
    plt.xlim(X_.min(), X_.max())
    plt.xlabel("Year")
    plt.ylabel(r"CO$_2$ in ppm")
    plt.title(r"Atmospheric CO$_2$ concentration at Mauna Loa")
    plt.tight_layout()
    plt.legend()
    plt.show()
コード例 #3
0
def example_2d_regression():
    means = np.array([[-1, 0], [3. * np.pi / 4, 11. * np.pi / 8]])
    cov = np.array([[[1 / 2., 0], [0, 1 / 3.]],
                    [[1. / 4, 1. / -5], [1. / -5, 1. / 5.3]]])
    realpik = np.array([1. / 2, 1. / 2])
    N = 1000
    M = 250
    real_omegas = samplingGMM(N=M, means=means, cov=cov, pi=realpik)
    # plt.scatter(real_omegas.T[0], real_omegas.T[1], label='Samples')
    # plt.legend()
    # plt.show()
    real_beta = np.array(
        multivariate_normal.rvs(mean=np.zeros(2 * M),
                                cov=np.identity(2 * M),
                                size=1))
    # Xi = np.linspace(-10, 10, 2000)
    X = np.linspace(-10, 10, N)
    Y = np.linspace(-10, 10, N)
    X, Y = np.meshgrid(X, Y)
    Xi = sc.random.multivariate_normal([0, 0], [[10, 0], [0, 10]], N)
    Yi = f(Xi, real_omegas, real_beta)
    # plt.scatter(Xi, Yi, label='Samples')
    # plt.legend()
    # plt.show()
    number_of_rounds = 10
    bank = BaNK_regression.bank_regression(Xi, Yi, M)
    bank.learn_kernel(number_of_rounds)
    bank.sample_beta_sigma()
    # Yi = f(Xi, real_omegas, real_beta)
    real_Phi_X = __matrix_phi(Xi, real_omegas)
    Phi_X = __matrix_phi(Xi, bank.omegas)
    # Yi_predicted = np.random.multivariate_normal(Phi_X.dot(bank.beta), sigma_e * np.identity(len(Phi_X)))
    error = Phi_X.dot(bank.beta) - real_Phi_X.dot(real_beta)
    print("MSE 2D: " + str(np.abs(error).mean()))
    fig = plt.figure()
    ax = fig.gca(projection='3d')

    ax.scatter(Xi.T[0],
               Xi.T[1],
               real_Phi_X.dot(real_beta),
               linewidth=0.2,
               antialiased=True,
               label='Real',
               color='blue')
    ax.scatter(Xi.T[0],
               Xi.T[1],
               Phi_X.dot(bank.beta),
               linewidth=0.2,
               antialiased=True,
               label='Sampled',
               color='red')
    # ax.legend()
    # plt.legend()
    plt.show()
コード例 #4
0
def __do_train_test_locally(X, y, M1, M2, swaps, name_):
    #Falta implementar bien el BaNK locally así como el BaNK classification
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
    bank = BaNK_regression.Bank_Locally_Regression(
        X_train, y_train, M1, M2)  # Bank_Locally_Regression
    bank.learn_kernel(swaps, X, y, name_)
    y_pred = bank.predict(X_test)
    y_min, y_max = min(y), max(y)
    print("MSE: " + name_ + " " + str(mean_squared_error(y_test, y_pred)))
    del X, y, X_train, X_test, y_train, y_test
    print("y_min: " + str(y_min) + " y_max: " + str(y_max) + " Range:" +
          str(y_max - y_min))
コード例 #5
0
def __do_train_test(X, y, M, swaps, name_):
    print("Start train ---------------------------" + name_ +
          "-------------------------Time: " + str(time.strftime("%c")) +
          "----------")
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
    bank = BaNK_regression.bank_regression(X_train, y_train,
                                           M)  # Bank_Locally_Regression
    bank.learn_kernel(swaps, X, y, name_)

    MSE_train = bank.return_mse(X_train, y_train)
    print("MSE train: " + name_ + " " + str(MSE_train) + " swaps: " +
          str(swaps) + " M: " + str(M))

    MSE_test = bank.return_mse(X_test, y_test)
    print("MSE test: " + name_ + " " + str(MSE_test) + " swaps: " +
          str(swaps) + " M: " + str(M))

    bank.save_prediction(
        X, y, name_ + "_Swaps_" + str(swaps) + "_score_" + MSE_test +
        "_stationary.csv")
    del X, y, bank
    gc.collect()
コード例 #6
0
def common_examples(M, swaps):
    X, y = load_mauna_loa_atmospheric_co2()
    X = X.reshape(len(X), )
    name_ = "Mauna LOA CO2"
    print(
        "Start train --------------------------- Mauna LOA CO2 -------------------------Time: "
        + str(time.strftime("%c")) + "----------")
    bank = BaNK_regression.bank_regression(X, y, M)  # Bank_Locally_Regression
    bank.learn_kernel(swaps, X, y, name_)

    MSE_train = bank.return_mse(X, y)

    print("MSE train: " + name_ + " " + str(MSE_train) + " swaps: " +
          str(swaps) + " M: " + str(M))

    X_ = np.linspace(X.min(), X.max() + 3, 1000)[:, np.newaxis]
    del X, y
    bank.save_prediction_noTrue(
        X_, name_ + "_Swaps_" + str(swaps) + "_MSE_" + str(MSE_train) +
        "_stationary.csv")
    del bank
    # bank = null
    gc.collect()

    dataset = datasets.fetch_california_housing()
    X_full, y_full = dataset.data, dataset.target
    del dataset
    __do_train_test(X_full, y_full, M, swaps, "california_houses")

    X, y = datasets.load_boston(True)
    __do_train_test(X, y, M, swaps, "Boston house-price")
    del X, y

    diabetes = datasets.load_diabetes()
    X = diabetes.data
    y = diabetes.target
    __do_train_test(X, y, M, swaps, "Diabetes")
    del X, y
コード例 #7
0
def example_1D_regression():
    means = np.array([0, 3 * np.pi / 4, 11 * np.pi / 8])
    cov = np.array([1. / 4, 1. / 4, 1. / 4**2])
    realpik = np.array([1. / 3, 1. / 3, 1. / 3])

    # means = np.array([0, 3./4 * np.pi ])
    # cov = np.array([1. / 2**2, 1. / 2**2])
    # realpik = np.array([1. / 2, 1. / 2])
    N = 8000
    M = 250
    inicio, fin = -100, 100

    real_omegas = samplingGMM_1d(N=M,
                                 means=means,
                                 cov=np.sqrt(cov),
                                 pi=realpik)
    real_beta = np.array(
        multivariate_normal.rvs(mean=np.zeros(2 * M),
                                cov=np.identity(2 * M),
                                size=1))
    # real_beta = np.append(1, real_beta)
    Xi = np.linspace(inicio, fin, N)
    Xi = Xi.reshape(N, 1)
    Yi = f(Xi, real_omegas, real_beta, True)
    X_train, X_test, Y_train, Y_test = train_test_split(Xi, Yi, test_size=0.2)
    bank = BaNK_regression.bank_regression(X_train, Y_train, M)
    real_Phi_X = bank.matrix_phi_with_X(real_omegas, Xi)
    plt.scatter(Xi,
                real_Phi_X.dot(real_beta),
                label='Real mean',
                color='black')
    # plt.scatter(Xi, real_Phi_X.dot(real_beta) + 3 * 1, color='black', label='Real variance')
    # plt.scatter(Xi, real_Phi_X.dot(real_beta) - 3 * 1, color='black')
    plt.legend()
    plt.show()

    number_of_rounds = 5

    bank.learn_kernel(number_of_rounds)
    Xi = np.linspace(-25, 25, N)

    Yi_learned = printKernel(Xi, bank.means, bank.get_covariance_matrix(),
                             bank.get_pik())

    Yi_real = printKernel(Xi, means, cov, realpik)

    plt.plot(Xi, Yi_learned, label='Kernel learned')
    plt.plot(Xi, Yi_real, label='Kernel real')
    plt.legend()
    plt.show()
    Xi = Xi.reshape(len(Xi), 1)
    real_Phi_X = bank.matrix_phi_with_X(real_omegas, Xi)
    Phi_X = bank.matrix_phi_with_X(bank.omegas, Xi)
    error = np.abs(real_Phi_X - Phi_X)
    print("Error Phi_X: " + str(error.mean()) + " +- " + str(error.std()))
    Yi_pred = bank.predict_new_X_beta_omega(X_train)
    error = np.abs(Y_train - Yi_pred)
    print("Error Yi_train: " + str(error.mean()) + " +- " + str(error.std()))
    # error = Phi_X.dot(bank.beta) - real_Phi_X.dot(real_beta)
    # print("MSE 1D: " + str(np.abs(error).mean()))
    # plt.scatter(Xi, Yi, label='Real Yi')
    # plt.scatter(Xi, Yi_predicted, label='Predicted Yi')
    Yi_pred = bank.predict_new_X_beta_omega(X_test)
    bank.sample_beta_sigma()
    sigma_e = bank.sigma_e
    error = np.abs(Y_test - Yi_pred)
    print("Error Yi_test: " + str(error.mean()) + " +- " + str(error.std()))
    Xi = np.linspace(inicio, fin, N)
    Xi = Xi.reshape(N, 1)
    # Phi_X.dot(self.beta)
    real_Phi_X = bank.matrix_phi_with_X(real_omegas, Xi)
    Phi_X = bank.matrix_phi_with_X(bank.omegas, Xi)
    plt.plot(Xi, real_Phi_X.dot(real_beta), label='Real mean', color='black')
    plt.plot(Xi,
             real_Phi_X.dot(real_beta) + 3 * 1,
             '--',
             color='black',
             label='Real variance')
    plt.plot(Xi, real_Phi_X.dot(real_beta) - 3 * 1, '--', color='black')
    plt.plot(Xi, Phi_X.dot(bank.beta), label='Sample mean', color='red')
    plt.plot(Xi,
             Phi_X.dot(bank.beta) + 3 * np.sqrt(sigma_e),
             '--',
             color='red',
             label='Sampled variance')
    plt.plot(Xi,
             Phi_X.dot(bank.beta) - 3 * np.sqrt(sigma_e),
             '--',
             color='red')
    plt.legend()
    plt.show()