def test_two_dimensional_tests_agrees(self): np.random.seed(43) me = GaussianQuadraticTest(self.grad_log_normal) samples = np.random.randn(10, 2) U1, _ = me.get_statisitc_two_dim(10, samples, 1) U2, _ = me.get_statistic_multiple_dim(samples, 1) np.testing.assert_almost_equal(U1, U2)
def test_k_multiple_equals_k_no_dim(self): N = 10 X = np.random.randn(N, 1) me = GaussianQuadraticTest(self.grad_log_normal) K1 = me.k_multiple_dim(X) K2 = me.k_multiple(X[:, 0]) np.testing.assert_almost_equal(K1, K2)
def test_regression_2(self): np.random.seed(42) data = np.random.randn(100) * 2.0 me = GaussianQuadraticTest(self.grad_log_normal) U_stat, _ = me.get_statistic_multiple(data) pval = me.compute_pvalue(U_stat) assert pval == 0.0
def test_two_dimensional_tests_alt(self): np.random.seed(43) me = GaussianQuadraticTest(self.grad_log_normal) samples = np.random.randn(100, 2) + 1 U, _ = me.get_statisitc_two_dim(100, samples, 1) p = me.compute_pvalue(U) assert p == 0
def test_gk_multiple_dim(self): N = 10 X = np.random.randn(N, 1) me = GaussianQuadraticTest(self.grad_log_normal) K = me.k_multiple_dim(X) gk_alt = me.gk_multiple_dim(X, K, 0) gk_orig = me.gk_multiple(X[:, 0]) np.testing.assert_almost_equal(gk_alt, gk_orig)
def test_get_statistic_multiple_equals_get_statistic(self): N = 10 X = np.random.randn(N) me = GaussianQuadraticTest(self.grad_log_normal) U_matrix_multiple, stat_multiple = me.get_statistic_multiple(X) U_matrix, stat = me.get_statisitc(N, X) assert_allclose(stat, stat_multiple) assert_allclose(U_matrix_multiple, U_matrix)
def test_k_multiple_equals_k_no_grad_multiple_given(self): N = 10 X = np.random.randn(N) me = GaussianQuadraticTest(self.grad_log_normal) K = me.k_multiple(X) for i in range(N): for j in range(N): k = me.k(X[i], X[j]) assert_almost_equal(K[i, j], k)
def test_gk_multiple_equals_gk(self): N = 10 X = np.random.randn(N) me = GaussianQuadraticTest(self.grad_log_normal) GK = me.gk_multiple(X) for i in range(N): for j in range(N): gk = me.gk(X[i], X[j]) assert_almost_equal(GK[i, j], gk)
def test_corr(self): np.random.seed(43) sigma = np.array([[1, 0.5], [0.5, 1]]) def grad_log_correleted(x): sigmaInv = np.linalg.inv(sigma) return -np.dot(sigmaInv.T + sigmaInv, x) / 2.0 me = GaussianQuadraticTest(grad_log_correleted) qm = QuadraticMultiple(me) X = np.random.multivariate_normal([0, 0], sigma, 200) reject, p_val = qm.is_from_null(0.05, X, 0.1) np.testing.assert_almost_equal([0.465, 0.465], p_val)
def test_k_multiple_equals_k_grad_multiple_given(self): def fun(self, X): return -X N = 10 X = np.random.randn(N) me = GaussianQuadraticTest(self.grad_log_normal, grad_log_prob_multiple=fun) K = me.k_multiple(X) for i in range(N): for j in range(N): k = me.k(X[i], X[j]) assert_almost_equal(K[i, j], k)
def run_simulation(sample_size, bootstrap_size=600, average_over=400): for d in [2, 5, 10, 15, 20, 25]: samples = [] for i in range(bootstrap_size): samples.append(baringhaus_stat(np.random.randn(sample_size, d))) samples = np.array(samples) pvals_brainghaus = [] pvals_stein = [] pvals_imq = [] for i in range(average_over): X = np.random.randn(sample_size, d) X[:, 0] += np.random.rand(sample_size) # baringhaus p value T = baringhaus_stat(X) pval = float(len(samples[samples > T])) / bootstrap_size pvals_brainghaus.append(pval) # gaussian p value me = GaussianQuadraticTest(grad_log_normal) qm = QuadraticMultiple2(me) p = qm.is_from_null(0.1, np.copy(X), 0.5) pvals_stein.append(p) # IMQ p value me2 = MultiquadricQuadraticTest(grad_log_normal, beta=-0.5) qm2 = QuadraticMultiple2(me2) p2 = qm2.is_from_null(0.1, np.copy(X), 0.5) pvals_imq.append(p2) print('d :', d) pvals_brainghaus = np.array(pvals_brainghaus) print( 'baringhaus :', float(len(pvals_brainghaus[pvals_brainghaus < 0.1])) / average_over) pvals_stein = np.array(pvals_stein) print('Stein :', float(len(pvals_stein[pvals_stein < 0.1])) / average_over) pvals_imq = np.array(pvals_imq) print('IMQ :', float(len(pvals_imq[pvals_imq < 0.1])) / average_over)
m.optimize() res = 100 pred_mean, pred_std = m.predict(X_test) plt.plot(X_test, pred_mean, 'b-') plt.plot(X_test, pred_mean + 2 * pred_std, 'b--') plt.plot(X_test, pred_mean - 2 * pred_std, 'b--') plt.plot(X_train, y_train, 'b.', markersize=3) plt.plot(X_test, y_test, 'r.', markersize=5) plt.grid(True) plt.xlabel(r"$X$") plt.ylabel(r"$y$") plt.savefig("gp_regression_data_fit.eps", bbox_inches='tight') plt.show() s = GaussianQuadraticTest(None) gradients = compute_gp_regression_gradients(y_test, pred_mean, pred_std) U_matrix, stat = s.get_statistic_multiple_custom_gradient(y_test[:, 0], gradients[:, 0]) num_test_samples = 10000 null_samples = bootstrap_null(U_matrix, num_bootstrap=num_test_samples) sns.distplot(null_samples, kde=False, norm_hist=True) plt.plot([stat, stat], [0, .012], 'black') plt.legend([r"$V_n$ test", r"Bootstrapped $B_n$"]) plt.xlabel(r"$V_n$") plt.ylabel(r"Frequency") plt.savefig("gp_regression_bootstrap_hist.eps", bbox_inches='tight') plt.show()
arr = [] me = GaussianSteinTest(grad_log_pob, 1) for time in times_we_look_at: chain_at_time = samples[:, time] # print(time) # pval = me.compute_pvalue(chain_at_time) # arr.append(pval) def grad_log_pob(t): a = np.sum(manual_grad(t[0], t[1], X), axis=0) + grad_log_prior(t) return a P_CHANGE = 0.1 me = GaussianQuadraticTest(grad_log_pob) qm = QuadraticMultiple(me) reject, p = qm.is_from_null(0.05, chain_at_time, 0.1) print(reject) # import matplotlib.pyplot as plt # # print(arr) # # plt.plot(arr) # # plt.show()
dfs = range(1, 4, 2) mc_reps = 100 res = np.empty((0, 2)) block = N / np.log(N) p_change = 1.0 / block print(p_change) for df in dfs: for mc in range(mc_reps): print(mc) X = almost_t_student(10 * N, df, 0.01) X = X[::10] me = GaussianQuadraticTest(grad_log_normal) U_stat, _ = me.get_statistic_multiple(X) pval = me.compute_pvalues_for_processes(U_stat, p_change) res = np.vstack((res, np.array([df, pval]))) for mc in range(mc_reps): X = almost_t_student(10 * N, 100, 0.01) X = X[::10] me = GaussianQuadraticTest(grad_log_normal) U_stat, _ = me.get_statistic_multiple(X) pval = me.compute_pvalues_for_processes(U_stat, p_change) res = np.vstack((res, np.array([np.Inf, pval]))) np.save('results.npy', res)
arr = np.empty((0, 2)) arr2 = np.empty((0, 2)) for c in [1.0, 1.3, 2.0, 3.0]: print('c', c) log_normal = logg(c) for i in range(23): print(i) x = metropolis_hastings(log_normal, chain_size=500, thinning=15, x_prev=np.random.randn(2)) me = GaussianQuadraticTest(grad_log_dens) qm = QuadraticMultiple(me) qm2 = QuadraticMultiple2(me) accept_null, p_val = qm.is_from_null(0.05, x, 0.1) p_val2 = qm2.is_from_null(0.05, x, 0.1) print(p_val2) arr = np.vstack((arr, np.array([c, min(p_val)]))) arr2 = np.vstack((arr2, np.array([c, p_val2]))) df = DataFrame(arr) pr = seaborn.boxplot(x=0, y=1, data=df) seaborn.plt.show() df = DataFrame(arr2) pr = seaborn.boxplot(x=0, y=1, data=df)
import numpy as np from stat_test.quadratic_time import GaussianQuadraticTest from tools.tools import store_results if __name__ == '__main__': D = 1 N_test = 500 N_fit = 50000 ms_fit = np.array( [1, 2, 5, 10, 25, 50, 75, 100, 250, 500, 1000, 2000, 5000]) sigma = 1 lmbda = 0.01 grad = lambda x: est.grad(np.array([x]))[0] s = GaussianQuadraticTest(grad) num_bootstrap = 200 result_fname = os.path.splitext(os.path.basename(__file__))[0] + ".txt" num_repetitions = 150 for _ in range(num_repetitions): for m in ms_fit: est = KernelExpFiniteGaussian(sigma, lmbda, m, D) X_test = np.random.randn(N_test, D) X = np.random.randn(N_fit, D) est.fit(X) U_matrix, stat = s.get_statistic_multiple(X_test[:, 0])