data = loadmat("../data/02-solar.mat") X = data['X'] y = data['y'] X_train, y_train, X_test, y_test, N, N_test = prepare_dataset(X, y) print "num_train:", len(X_train) print "num_test:", len(X_test) kernel = RBF(input_dim=1, variance=1., lengthscale=1.) m = GPRegression(X_train, y_train, kernel) m.optimize() res = 100 pred_mean, pred_std = m.predict(X_test) plt.plot(X_test, pred_mean, 'b-') plt.plot(X_test, pred_mean + 2 * pred_std, 'b--') plt.plot(X_test, pred_mean - 2 * pred_std, 'b--') plt.plot(X_train, y_train, 'b.', markersize=3) plt.plot(X_test, y_test, 'r.', markersize=5) plt.grid(True) plt.xlabel(r"$X$") plt.ylabel(r"$y$") plt.savefig("gp_regression_data_fit.eps", bbox_inches='tight') plt.show() s = GaussianQuadraticTest(None) gradients = compute_gp_regression_gradients(y_test, pred_mean, pred_std) U_matrix, stat = s.get_statistic_multiple_custom_gradient(y_test[:, 0], gradients[:, 0]) num_test_samples = 10000
thinning = np.argmin(np.abs(autocorrelation - 0.5)) + 1 return thinning, autocorrelation def normal_mild_corr(N): X = metropolis_hastings(log_normal, chain_size=N, thinning=1, x_prev=np.random.randn(), step=0.55) return X X = normal_mild_corr(TEST_CHAIN_SIZE) sgld_thinning, autocorr = get_thinning(X, 500) print('thinning for sgld t-student simulation ', sgld_thinning, autocorr[sgld_thinning]) X = normal_mild_corr(sgld_thinning * 100000) X = X[::sgld_thinning] r = acf(X, nlags=30) print(r) seaborn.set_style("whitegrid") plt.plot(r) plt.xlabel('lags') plt.ylabel('auto correlation') plt.ylim([0, 1]) plt.tight_layout() plt.savefig('../write_up/img/sgld_lags.eps')
return DataFrame(data) p_values = to_data_frame(p_values[::5], epsilon[::5]) # likelihood_evaluations = to_data_frame(likelihood_evaluations, epsilon) plt.figure() sns.boxplot(x=0, y=1, data=p_values, palette="BuGn_d") plt.ylabel("p values") plt.xlabel("epsilon") plt.tight_layout() plt.savefig('../../write_up/img/Heiko1.pdf') plt.figure() plt.plot(epsilon[::2], np.mean(likelihood_evaluations[::2], axis=1), 'g') plt.ylabel("likelihood evaluations") plt.xlabel("epsilon") plt.tight_layout() plt.savefig('../../write_up/img/Heiko2.pdf') # # f, (ax1, ax2) = plt.subplots(2, 1, figsize=(8, 4), sharex=True) # # sns.boxplot(x=0, y=1, data=p_values, palette="BuGn_d", ax=ax1) # ax1.set_ylabel("p values") # ax1.set_xlabel("") # # y2 = likelihood_evaluations # sns.barplot(x=0, y=1, data=likelihood_evaluations, palette="RdBu_d", ax=ax2) # # ax2.set_ylabel("Likelihood evaluations")
# estimate size of thinning def get_thinning(X,nlags = 50): autocorrelation = acf(X, nlags=nlags, fft=True) # find correlation closest to given v thinning = np.argmin(np.abs(autocorrelation - 0.5)) + 1 return thinning, autocorrelation def normal_mild_corr(N): X = metropolis_hastings(log_normal, chain_size=N, thinning=1, x_prev=np.random.randn(),step=0.55) return X X = normal_mild_corr(TEST_CHAIN_SIZE) sgld_thinning, autocorr = get_thinning(X,500) print('thinning for sgld t-student simulation ', sgld_thinning,autocorr[sgld_thinning]) X = normal_mild_corr(sgld_thinning *100000) X = X[::sgld_thinning] r= acf(X,nlags=30) print(r) seaborn.set_style("whitegrid") plt.plot(r) plt.xlabel('lags') plt.ylabel('auto correlation') plt.ylim([0,1]) plt.tight_layout() plt.savefig('../write_up/img/sgld_lags.eps')
X_train, y_train, X_test, y_test, N, N_test = prepare_dataset(X, y) print "num_train:", len(X_train) print "num_test:", len(X_test) kernel = RBF(input_dim=1, variance=0.608, lengthscale=0.207) m = GPRegression(X_train, y_train, kernel, noise_var=0.283) m.optimize() res = 100 pred_mean, pred_std = m.predict(X_test) X_test_plot = X_test[:, 0] * 116.502738394 + 1815.93213296 fig, ax = plt.subplots() plt.plot(X_test_plot, pred_mean, "r-") # plt.plot(X_test, pred_mean + 2 * pred_std, 'b--') # plt.plot(X_test, pred_mean - 2 * pred_std, 'b--') # some hacks to make x axis ok again lower = (pred_mean - 2 * pred_std)[:, 0] upper = (pred_mean + 2 * pred_std)[:, 0] plt.fill_between(X_test_plot, lower, upper, color="r", alpha=0.3) plt.plot(X_train * 116.502738394 + 1815.93213296, y_train, "b.", markersize=3) plt.plot(X_test_plot, y_test, "*", color="black", markersize=5) plt.grid(True) plt.xlabel(r"Year") plt.ylabel(r"Solar activity (normalised)") start, end = ax.get_xlim() ax.xaxis.set_ticks(np.arange(start, end, 100))
data.append([epsilon[i], eval]) return DataFrame(data) p_values = to_data_frame(p_values[::5], epsilon[::5]) # likelihood_evaluations = to_data_frame(likelihood_evaluations, epsilon) plt.figure() sns.boxplot(x=0, y=1, data=p_values, palette="BuGn_d") plt.ylabel("p values") plt.xlabel("epsilon") plt.tight_layout() plt.savefig('../../write_up/img/Heiko1.pdf') plt.figure() plt.plot(epsilon[::2],np.mean(likelihood_evaluations[::2],axis=1),'g') plt.ylabel("likelihood evaluations") plt.xlabel("epsilon") plt.tight_layout() plt.savefig('../../write_up/img/Heiko2.pdf') # # f, (ax1, ax2) = plt.subplots(2, 1, figsize=(8, 4), sharex=True) # # sns.boxplot(x=0, y=1, data=p_values, palette="BuGn_d", ax=ax1) # ax1.set_ylabel("p values") # ax1.set_xlabel("") # # y2 = likelihood_evaluations # sns.barplot(x=0, y=1, data=likelihood_evaluations, palette="RdBu_d", ax=ax2) # # ax2.set_ylabel("Likelihood evaluations")