def cross_validation_regression(X, Y, M, number_rounds, times_crossvalidation): scores = [] n = len(X) n_train = int(n * 0.8) for i in range(times_crossvalidation): time_begin = datetime.datetime.now() print("Begin " + str(i + 1) + " of " + str(times_crossvalidation) + " at " + str(time_begin.strftime("%A, %d. %B %Y %I:%M%p"))) np.random.shuffle(X) X_train, Y_train = X[:n_train], Y[:n_train] X_test, Y_test = X[n_train:], Y[n_train:] bank = BaNK_regression.bank_regression(X_train, Y_train, M) bank.learn_kernel(number_rounds) time_end = datetime.datetime.now() print("End " + str(i + 1) + " of " + str(times_crossvalidation) + " at " + str(time_end.strftime("%A, %d. %B %Y %I:%M%p"))) print("Training time: " + str(time_end - time_begin)) Y_predicted = bank.predict_new_X_beta_omega(X_test) # error = (Y_test - Y_predicted) / rangey error = (Y_test - Y_predicted) error_mean = np.abs(error).mean() error_std = np.abs(error).std() scores.append([error_mean, error_std]) return np.array(scores)
def mauna_atmospheric(M, swaps): X, y = load_mauna_loa_atmospheric_co2() X = X.reshape(len(X), ) bank = BaNK_regression.bank_regression(X, y, M) bank.learn_kernel(swaps, X, y, "Mauna LOA CO2") X_ = np.linspace(X.min(), X.max() + 3, 1000)[:, np.newaxis] y_pred = bank.predict(X_) bank.sample_beta_sigma() y_std = bank.get_beta_sigma()[1] # Illustration plt.scatter(X, y, c='k') plt.plot(X_, y_pred) plt.fill_between(X_[:, 0], y_pred - y_std, y_pred + y_std, alpha=0.5, color='k') plt.xlim(X_.min(), X_.max()) plt.xlabel("Year") plt.ylabel(r"CO$_2$ in ppm") plt.title(r"Atmospheric CO$_2$ concentration at Mauna Loa") plt.tight_layout() plt.legend() plt.show()
def example_2d_regression(): means = np.array([[-1, 0], [3. * np.pi / 4, 11. * np.pi / 8]]) cov = np.array([[[1 / 2., 0], [0, 1 / 3.]], [[1. / 4, 1. / -5], [1. / -5, 1. / 5.3]]]) realpik = np.array([1. / 2, 1. / 2]) N = 1000 M = 250 real_omegas = samplingGMM(N=M, means=means, cov=cov, pi=realpik) # plt.scatter(real_omegas.T[0], real_omegas.T[1], label='Samples') # plt.legend() # plt.show() real_beta = np.array( multivariate_normal.rvs(mean=np.zeros(2 * M), cov=np.identity(2 * M), size=1)) # Xi = np.linspace(-10, 10, 2000) X = np.linspace(-10, 10, N) Y = np.linspace(-10, 10, N) X, Y = np.meshgrid(X, Y) Xi = sc.random.multivariate_normal([0, 0], [[10, 0], [0, 10]], N) Yi = f(Xi, real_omegas, real_beta) # plt.scatter(Xi, Yi, label='Samples') # plt.legend() # plt.show() number_of_rounds = 10 bank = BaNK_regression.bank_regression(Xi, Yi, M) bank.learn_kernel(number_of_rounds) bank.sample_beta_sigma() # Yi = f(Xi, real_omegas, real_beta) real_Phi_X = __matrix_phi(Xi, real_omegas) Phi_X = __matrix_phi(Xi, bank.omegas) # Yi_predicted = np.random.multivariate_normal(Phi_X.dot(bank.beta), sigma_e * np.identity(len(Phi_X))) error = Phi_X.dot(bank.beta) - real_Phi_X.dot(real_beta) print("MSE 2D: " + str(np.abs(error).mean())) fig = plt.figure() ax = fig.gca(projection='3d') ax.scatter(Xi.T[0], Xi.T[1], real_Phi_X.dot(real_beta), linewidth=0.2, antialiased=True, label='Real', color='blue') ax.scatter(Xi.T[0], Xi.T[1], Phi_X.dot(bank.beta), linewidth=0.2, antialiased=True, label='Sampled', color='red') # ax.legend() # plt.legend() plt.show()
def __do_train_test_locally(X, y, M1, M2, swaps, name_): #Falta implementar bien el BaNK locally así como el BaNK classification X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) bank = BaNK_regression.Bank_Locally_Regression( X_train, y_train, M1, M2) # Bank_Locally_Regression bank.learn_kernel(swaps, X, y, name_) y_pred = bank.predict(X_test) y_min, y_max = min(y), max(y) print("MSE: " + name_ + " " + str(mean_squared_error(y_test, y_pred))) del X, y, X_train, X_test, y_train, y_test print("y_min: " + str(y_min) + " y_max: " + str(y_max) + " Range:" + str(y_max - y_min))
def __do_train_test(X, y, M, swaps, name_): print("Start train ---------------------------" + name_ + "-------------------------Time: " + str(time.strftime("%c")) + "----------") X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) bank = BaNK_regression.bank_regression(X_train, y_train, M) # Bank_Locally_Regression bank.learn_kernel(swaps, X, y, name_) MSE_train = bank.return_mse(X_train, y_train) print("MSE train: " + name_ + " " + str(MSE_train) + " swaps: " + str(swaps) + " M: " + str(M)) MSE_test = bank.return_mse(X_test, y_test) print("MSE test: " + name_ + " " + str(MSE_test) + " swaps: " + str(swaps) + " M: " + str(M)) bank.save_prediction( X, y, name_ + "_Swaps_" + str(swaps) + "_score_" + MSE_test + "_stationary.csv") del X, y, bank gc.collect()
def common_examples(M, swaps): X, y = load_mauna_loa_atmospheric_co2() X = X.reshape(len(X), ) name_ = "Mauna LOA CO2" print( "Start train --------------------------- Mauna LOA CO2 -------------------------Time: " + str(time.strftime("%c")) + "----------") bank = BaNK_regression.bank_regression(X, y, M) # Bank_Locally_Regression bank.learn_kernel(swaps, X, y, name_) MSE_train = bank.return_mse(X, y) print("MSE train: " + name_ + " " + str(MSE_train) + " swaps: " + str(swaps) + " M: " + str(M)) X_ = np.linspace(X.min(), X.max() + 3, 1000)[:, np.newaxis] del X, y bank.save_prediction_noTrue( X_, name_ + "_Swaps_" + str(swaps) + "_MSE_" + str(MSE_train) + "_stationary.csv") del bank # bank = null gc.collect() dataset = datasets.fetch_california_housing() X_full, y_full = dataset.data, dataset.target del dataset __do_train_test(X_full, y_full, M, swaps, "california_houses") X, y = datasets.load_boston(True) __do_train_test(X, y, M, swaps, "Boston house-price") del X, y diabetes = datasets.load_diabetes() X = diabetes.data y = diabetes.target __do_train_test(X, y, M, swaps, "Diabetes") del X, y
def example_1D_regression(): means = np.array([0, 3 * np.pi / 4, 11 * np.pi / 8]) cov = np.array([1. / 4, 1. / 4, 1. / 4**2]) realpik = np.array([1. / 3, 1. / 3, 1. / 3]) # means = np.array([0, 3./4 * np.pi ]) # cov = np.array([1. / 2**2, 1. / 2**2]) # realpik = np.array([1. / 2, 1. / 2]) N = 8000 M = 250 inicio, fin = -100, 100 real_omegas = samplingGMM_1d(N=M, means=means, cov=np.sqrt(cov), pi=realpik) real_beta = np.array( multivariate_normal.rvs(mean=np.zeros(2 * M), cov=np.identity(2 * M), size=1)) # real_beta = np.append(1, real_beta) Xi = np.linspace(inicio, fin, N) Xi = Xi.reshape(N, 1) Yi = f(Xi, real_omegas, real_beta, True) X_train, X_test, Y_train, Y_test = train_test_split(Xi, Yi, test_size=0.2) bank = BaNK_regression.bank_regression(X_train, Y_train, M) real_Phi_X = bank.matrix_phi_with_X(real_omegas, Xi) plt.scatter(Xi, real_Phi_X.dot(real_beta), label='Real mean', color='black') # plt.scatter(Xi, real_Phi_X.dot(real_beta) + 3 * 1, color='black', label='Real variance') # plt.scatter(Xi, real_Phi_X.dot(real_beta) - 3 * 1, color='black') plt.legend() plt.show() number_of_rounds = 5 bank.learn_kernel(number_of_rounds) Xi = np.linspace(-25, 25, N) Yi_learned = printKernel(Xi, bank.means, bank.get_covariance_matrix(), bank.get_pik()) Yi_real = printKernel(Xi, means, cov, realpik) plt.plot(Xi, Yi_learned, label='Kernel learned') plt.plot(Xi, Yi_real, label='Kernel real') plt.legend() plt.show() Xi = Xi.reshape(len(Xi), 1) real_Phi_X = bank.matrix_phi_with_X(real_omegas, Xi) Phi_X = bank.matrix_phi_with_X(bank.omegas, Xi) error = np.abs(real_Phi_X - Phi_X) print("Error Phi_X: " + str(error.mean()) + " +- " + str(error.std())) Yi_pred = bank.predict_new_X_beta_omega(X_train) error = np.abs(Y_train - Yi_pred) print("Error Yi_train: " + str(error.mean()) + " +- " + str(error.std())) # error = Phi_X.dot(bank.beta) - real_Phi_X.dot(real_beta) # print("MSE 1D: " + str(np.abs(error).mean())) # plt.scatter(Xi, Yi, label='Real Yi') # plt.scatter(Xi, Yi_predicted, label='Predicted Yi') Yi_pred = bank.predict_new_X_beta_omega(X_test) bank.sample_beta_sigma() sigma_e = bank.sigma_e error = np.abs(Y_test - Yi_pred) print("Error Yi_test: " + str(error.mean()) + " +- " + str(error.std())) Xi = np.linspace(inicio, fin, N) Xi = Xi.reshape(N, 1) # Phi_X.dot(self.beta) real_Phi_X = bank.matrix_phi_with_X(real_omegas, Xi) Phi_X = bank.matrix_phi_with_X(bank.omegas, Xi) plt.plot(Xi, real_Phi_X.dot(real_beta), label='Real mean', color='black') plt.plot(Xi, real_Phi_X.dot(real_beta) + 3 * 1, '--', color='black', label='Real variance') plt.plot(Xi, real_Phi_X.dot(real_beta) - 3 * 1, '--', color='black') plt.plot(Xi, Phi_X.dot(bank.beta), label='Sample mean', color='red') plt.plot(Xi, Phi_X.dot(bank.beta) + 3 * np.sqrt(sigma_e), '--', color='red', label='Sampled variance') plt.plot(Xi, Phi_X.dot(bank.beta) - 3 * np.sqrt(sigma_e), '--', color='red') plt.legend() plt.show()