kernel = RBF(input_dim=1, variance=1., lengthscale=1.)
 m = GPRegression(X_train, y_train, kernel)
 m.optimize()
 
 res = 100
 pred_mean, pred_std = m.predict(X_test)
 plt.plot(X_test, pred_mean, 'b-')
 plt.plot(X_test, pred_mean + 2 * pred_std, 'b--')
 plt.plot(X_test, pred_mean - 2 * pred_std, 'b--')
 plt.plot(X_train, y_train, 'b.', markersize=3)
 plt.plot(X_test, y_test, 'r.', markersize=5)
 plt.grid(True)
 plt.xlabel(r"$X$")
 plt.ylabel(r"$y$")
 plt.savefig("gp_regression_data_fit.eps", bbox_inches='tight')
 plt.show()
 
 s = GaussianQuadraticTest(None)
 gradients = compute_gp_regression_gradients(y_test, pred_mean, pred_std)
 U_matrix, stat = s.get_statistic_multiple_custom_gradient(y_test[:, 0], gradients[:, 0])
 
 num_test_samples = 10000
 null_samples = bootstrap_null(U_matrix, num_bootstrap=num_test_samples)
 
 sns.distplot(null_samples, kde=False, norm_hist=True)
 plt.plot([stat, stat], [0, .012], 'black')
 plt.legend([r"$V_n$ test", r"Bootstrapped $B_n$"])
 plt.xlabel(r"$V_n$")
 plt.ylabel(r"Frequency")
 plt.savefig("gp_regression_bootstrap_hist.eps", bbox_inches='tight')
    # filter out desired entries
    mask = (df[field] == df[field])
    for k, v in conditions.items():
        mask &= (df[k] == v)
    current = df.loc[mask]
    
    # only use desired values of x_fields
    current = current.loc[[True if x in x_field_values else False for x in current[x_field]]]

    # use ints on x-axis
    current[x_field] = current[x_field].astype(int)

    sns.set_style("whitegrid")
    sns.boxplot(x=x_field, y=field, data=current.sort(x_field))

    plt.xlabel(field_plot_names[x_field])
    plt.ylabel(field_plot_names[field])
    
    plt.tight_layout()

    fname_base = os.path.splitext(fname)[0]
    plt.savefig(fname_base + ".png", bbox_inches='tight')
    plt.savefig(fname_base + ".eps", bbox_inches='tight')
    
    # print info on number of trials
    print(field)
    print("Average number of trials: %d" % int(np.round(current.groupby(x_field).apply(len).mean())))
    print(current.groupby(x_field).apply(len))
    
plt.show()
    mask = (df[field] == df[field])
    for k, v in conditions.items():
        mask &= (df[k] == v)
    current = df.loc[mask]

    # only use desired values of x_fields
    current = current.loc[[
        True if x in x_field_values else False for x in current[x_field]
    ]]

    # use ints on x-axis
    current[x_field] = current[x_field].astype(int)

    sns.set_style("whitegrid")
    sns.boxplot(x=x_field, y=field, data=current.sort(x_field))

    plt.xlabel(field_plot_names[x_field])
    plt.ylabel(field_plot_names[field])

    fname_base = os.path.splitext(fname)[0]
    plt.savefig(fname_base + ".png", bbox_inches='tight')
    plt.savefig(fname_base + ".eps", bbox_inches='tight')

    # print info on number of trials
    print(field)
    print("Average number of trials: %d" %
          int(np.round(current.groupby(x_field).apply(len).mean())))
    print(current.groupby(x_field).apply(len))

plt.show()
    return thinning, autocorrelation


def normal_mild_corr(N):
    X = metropolis_hastings(log_normal,
                            chain_size=N,
                            thinning=1,
                            x_prev=np.random.randn(),
                            step=0.55)
    return X


X = normal_mild_corr(TEST_CHAIN_SIZE)
sgld_thinning, autocorr = get_thinning(X, 500)
print('thinning for sgld t-student simulation ', sgld_thinning,
      autocorr[sgld_thinning])

X = normal_mild_corr(sgld_thinning * 100000)
X = X[::sgld_thinning]

r = acf(X, nlags=30)
print(r)

seaborn.set_style("whitegrid")
plt.plot(r)
plt.xlabel('lags')
plt.ylabel('auto correlation')
plt.ylim([0, 1])
plt.tight_layout()
plt.savefig('../write_up/img/sgld_lags.eps')
Beispiel #5
0
    for i, r in enumerate(arr):
        for eval in r:
            data.append([epsilon[i], eval])
    return DataFrame(data)


p_values = to_data_frame(p_values[::5], epsilon[::5])

# likelihood_evaluations = to_data_frame(likelihood_evaluations, epsilon)

plt.figure()
sns.boxplot(x=0, y=1, data=p_values, palette="BuGn_d")
plt.ylabel("p values")
plt.xlabel("epsilon")
plt.tight_layout()
plt.savefig('../../write_up/img/Heiko1.pdf')

plt.figure()
plt.plot(epsilon[::2], np.mean(likelihood_evaluations[::2], axis=1), 'g')
plt.ylabel("likelihood evaluations")
plt.xlabel("epsilon")
plt.tight_layout()
plt.savefig('../../write_up/img/Heiko2.pdf')
#
# f, (ax1, ax2) = plt.subplots(2, 1, figsize=(8, 4), sharex=True)
#
# sns.boxplot(x=0, y=1, data=p_values, palette="BuGn_d", ax=ax1)
# ax1.set_ylabel("p values")
# ax1.set_xlabel("")
#
# y2 = likelihood_evaluations
# estimate size of thinning
def get_thinning(X,nlags = 50):
    autocorrelation = acf(X, nlags=nlags, fft=True)
    # find correlation closest to given v
    thinning = np.argmin(np.abs(autocorrelation - 0.5)) + 1
    return thinning, autocorrelation

def normal_mild_corr(N):
    X =  metropolis_hastings(log_normal, chain_size=N, thinning=1, x_prev=np.random.randn(),step=0.55)
    return X


X = normal_mild_corr(TEST_CHAIN_SIZE)
sgld_thinning, autocorr = get_thinning(X,500)
print('thinning for sgld t-student simulation ', sgld_thinning,autocorr[sgld_thinning])


X = normal_mild_corr(sgld_thinning *100000)
X = X[::sgld_thinning]

r= acf(X,nlags=30)
print(r)

seaborn.set_style("whitegrid")
plt.plot(r)
plt.xlabel('lags')
plt.ylabel('auto correlation')
plt.ylim([0,1])
plt.tight_layout()
plt.savefig('../write_up/img/sgld_lags.eps')
    #     plt.plot(X_test, pred_mean + 2 * pred_std, 'b--')
    #     plt.plot(X_test, pred_mean - 2 * pred_std, 'b--')
    # some hacks to make x axis ok again
    lower = (pred_mean - 2 * pred_std)[:, 0]
    upper = (pred_mean + 2 * pred_std)[:, 0]
    plt.fill_between(X_test_plot, lower, upper, color="r", alpha=0.3)
    plt.plot(X_train * 116.502738394 + 1815.93213296, y_train, "b.", markersize=3)
    plt.plot(X_test_plot, y_test, "*", color="black", markersize=5)
    plt.grid(True)
    plt.xlabel(r"Year")
    plt.ylabel(r"Solar activity (normalised)")

    start, end = ax.get_xlim()
    ax.xaxis.set_ticks(np.arange(start, end, 100))

    plt.savefig("gp_regression_data_fit.eps", bbox_inches="tight")
    plt.savefig("gp_regression_data_fit.pdf", bbox_inches="tight")
    exit()

    s = GaussianQuadraticTest(None)
    gradients = compute_gp_regression_gradients(y_test, pred_mean, pred_std)
    U_matrix, stat = s.get_statistic_multiple_custom_gradient(y_test[:, 0], gradients[:, 0])

    num_test_samples = 10000
    null_samples = bootstrap_null(U_matrix, num_bootstrap=num_test_samples)
    print "p-value:", 1.0 - np.mean(null_samples <= stat)

    plt.figure()
    sns.distplot(null_samples, kde=False, norm_hist=True)
    plt.plot([stat, stat], [0, 0.012], "black")
    plt.legend([r"$V_n$ test", r"Bootstrapped $B_n$"])
Beispiel #8
0
from tools.latex_plot_init import plt

results = np.load('results_good.npy')
df = DataFrame(results)

plt.figure()

seaborn.set_style("whitegrid")
seaborn.boxplot(x=0, y=1, data=df,palette="BuGn_d")

plt.tight_layout()
plt.ylabel('p values')
plt.ylim([0,1])
plt.xlabel('degrees of freedom')
plt.savefig('../write_up/img/sgld_student.pdf')

results = np.load('results_bad.npy')
df = DataFrame(results)

plt.figure()

seaborn.set_style("whitegrid")
seaborn.boxplot(x=0, y=1, data=df,palette="BuGn_d")

plt.tight_layout()
plt.ylabel('p values')
plt.ylim([0,1])
plt.xlabel('degrees of freedom')
plt.savefig('../write_up/img/sgld_student_bad.pdf')
Beispiel #9
0
    data = []
    for i, r in enumerate(arr):
        for eval in r:
            data.append([epsilon[i], eval])
    return DataFrame(data)

p_values = to_data_frame(p_values[::5], epsilon[::5])

# likelihood_evaluations = to_data_frame(likelihood_evaluations, epsilon)

plt.figure()
sns.boxplot(x=0, y=1, data=p_values, palette="BuGn_d")
plt.ylabel("p values")
plt.xlabel("epsilon")
plt.tight_layout()
plt.savefig('../../write_up/img/Heiko1.pdf')

plt.figure()
plt.plot(epsilon[::2],np.mean(likelihood_evaluations[::2],axis=1),'g')
plt.ylabel("likelihood evaluations")
plt.xlabel("epsilon")
plt.tight_layout()
plt.savefig('../../write_up/img/Heiko2.pdf')
#
# f, (ax1, ax2) = plt.subplots(2, 1, figsize=(8, 4), sharex=True)
#
# sns.boxplot(x=0, y=1, data=p_values, palette="BuGn_d", ax=ax1)
# ax1.set_ylabel("p values")
# ax1.set_xlabel("")
#
# y2 = likelihood_evaluations