def test_two_dimensional_tests_agrees(self): np.random.seed(43) me = GaussianQuadraticTest(self.grad_log_normal) samples = np.random.randn(10,2) U1,_ = me.get_statisitc_two_dim(10,samples,1) U2,_ = me.get_statistic_multiple_dim(samples,1) np.testing.assert_almost_equal(U1,U2)
def test_two_dimensional_tests_agrees(self): np.random.seed(43) me = GaussianQuadraticTest(self.grad_log_normal) samples = np.random.randn(10, 2) U1, _ = me.get_statisitc_two_dim(10, samples, 1) U2, _ = me.get_statistic_multiple_dim(samples, 1) np.testing.assert_almost_equal(U1, U2)
def test_regression_2(self): np.random.seed(42) data = np.random.randn(100) * 2.0 me = GaussianQuadraticTest(self.grad_log_normal) U_stat, _ = me.get_statistic_multiple(data) pval = me.compute_pvalue(U_stat) assert pval == 0.0
def test_k_multiple_equals_k_no_dim(self): N = 10 X = np.random.randn(N,1) me = GaussianQuadraticTest(self.grad_log_normal) K1 = me.k_multiple_dim(X) K2 =me.k_multiple(X[:,0]) np.testing.assert_almost_equal(K1, K2)
def test_two_dimensional_tests_alt(self): np.random.seed(43) me = GaussianQuadraticTest(self.grad_log_normal) samples = np.random.randn(100, 2) + 1 U, _ = me.get_statisitc_two_dim(100, samples, 1) p = me.compute_pvalue(U) assert p == 0
def test_regression_2(self): np.random.seed(42) data = np.random.randn(100) * 2.0 me = GaussianQuadraticTest(self.grad_log_normal) U_stat,_ = me.get_statistic_multiple(data) pval = me.compute_pvalue(U_stat) assert pval == 0.0
def test_k_multiple_equals_k_no_dim(self): N = 10 X = np.random.randn(N, 1) me = GaussianQuadraticTest(self.grad_log_normal) K1 = me.k_multiple_dim(X) K2 = me.k_multiple(X[:, 0]) np.testing.assert_almost_equal(K1, K2)
def test_two_dimensional_tests_alt(self): np.random.seed(43) me = GaussianQuadraticTest(self.grad_log_normal) samples = np.random.randn(100,2)+1 U,_ = me.get_statisitc_two_dim(100,samples,1) p = me.compute_pvalue(U) assert p == 0
def test_gk_multiple_dim(self): N = 10 X = np.random.randn(N, 1) me = GaussianQuadraticTest(self.grad_log_normal) K = me.k_multiple_dim(X) gk_alt = me.gk_multiple_dim(X, K, 0) gk_orig = me.gk_multiple(X[:, 0]) np.testing.assert_almost_equal(gk_alt, gk_orig)
def test_g1k_multiple_dim(self): N = 10 X = np.random.randn(N,1) me = GaussianQuadraticTest(self.grad_log_normal) K = me.k_multiple_dim(X) g1k_alt = me.g1k_multiple_dim(X,K,0) g1k_orig = me.g1k_multiple(X[:,0]) np.testing.assert_almost_equal(g1k_alt, g1k_orig)
def test_get_statistic_multiple_equals_get_statistic(self): N = 10 X = np.random.randn(N) me = GaussianQuadraticTest(self.grad_log_normal) U_matrix_multiple, stat_multiple = me.get_statistic_multiple(X) U_matrix, stat = me.get_statisitc(N, X) assert_allclose(stat, stat_multiple) assert_allclose(U_matrix_multiple, U_matrix)
def test_gk_multiple_equals_gk(self): N = 10 X = np.random.randn(N) me = GaussianQuadraticTest(self.grad_log_normal) GK = me.gk_multiple(X) for i in range(N): for j in range(N): gk = me.gk(X[i], X[j]) assert_almost_equal(GK[i, j], gk)
def test_k_multiple_equals_k_no_grad_multiple_given(self): N = 10 X = np.random.randn(N) me = GaussianQuadraticTest(self.grad_log_normal) K = me.k_multiple(X) for i in range(N): for j in range(N): k = me.k(X[i], X[j]) assert_almost_equal(K[i, j], k)
def compare_against_mmd_test(): data = loadmat("../data/02-solar.mat") X = data["X"] y = data["y"] X_train, y_train, X_test, y_test, N, N_test = prepare_dataset(X, y) kernel = RBF(input_dim=1, variance=0.608, lengthscale=0.207) m = GPRegression(X_train, y_train, kernel, noise_var=0.283) m.optimize() pred_mean, pred_std = m.predict(X_test) s = GaussianQuadraticTest(None) gradients = compute_gp_regression_gradients(y_test, pred_mean, pred_std) U_matrix, stat = s.get_statistic_multiple_custom_gradient(y_test[:, 0], gradients[:, 0]) num_test_samples = 10000 null_samples = bootstrap_null(U_matrix, num_bootstrap=num_test_samples) # null_samples = sample_null_simulated_gp(s, pred_mean, pred_std, num_test_samples) p_value_ours = 1.0 - np.mean(null_samples <= stat) y_rep = np.random.randn(len(X_test)) * pred_std.flatten() + pred_mean.flatten() y_rep = np.atleast_2d(y_rep).T A = np.hstack((X_test, y_test)) B = np.hstack((X_test, y_rep)) feats_p = RealFeatures(A.T) feats_q = RealFeatures(B.T) width = 1 kernel = GaussianKernel(10, width) mmd = QuadraticTimeMMD() mmd.set_kernel(kernel) mmd.set_p(feats_p) mmd.set_q(feats_q) mmd_stat = mmd.compute_statistic() # sample from null num_null_samples = 10000 mmd_null_samples = np.zeros(num_null_samples) for i in range(num_null_samples): # fix y_rep from above, and change the other one (that would replace y_test) y_rep2 = np.random.randn(len(X_test)) * pred_std.flatten() + pred_mean.flatten() y_rep2 = np.atleast_2d(y_rep2).T A = np.hstack((X_test, y_rep2)) feats_p = RealFeatures(A.T) width = 1 kernel = GaussianKernel(10, width) mmd = QuadraticTimeMMD() mmd.set_kernel(kernel) mmd.set_p(feats_p) mmd.set_q(feats_q) mmd_null_samples[i] = mmd.compute_statistic() p_value_mmd = 1.0 - np.mean(mmd_null_samples <= mmd_stat) return p_value_ours, p_value_mmd
def test_k_multiple_equals_k_grad_multiple_given(self): def fun(self, X): return -X N = 10 X = np.random.randn(N) me = GaussianQuadraticTest(self.grad_log_normal, grad_log_prob_multiple=fun) K = me.k_multiple(X) for i in range(N): for j in range(N): k = me.k(X[i], X[j]) assert_almost_equal(K[i, j], k)
def test_corr(self): np.random.seed(43) sigma = np.array([[1, 0.5], [0.5, 1]]) def grad_log_correleted(x): sigmaInv = np.linalg.inv(sigma) return -np.dot(sigmaInv.T + sigmaInv, x) / 2.0 me = GaussianQuadraticTest(grad_log_correleted) qm = QuadraticMultiple(me) X = np.random.multivariate_normal([0, 0], sigma, 200) reject, p_val = qm.is_from_null(0.05, X, 0.1) np.testing.assert_almost_equal([0.465, 0.465], p_val)
def run_simulation(sample_size, bootstrap_size=600, average_over=400): for d in [2, 5, 10, 15, 20, 25]: samples = [] for i in range(bootstrap_size): samples.append(baringhaus_stat(np.random.randn(sample_size, d))) samples = np.array(samples) pvals_brainghaus = [] pvals_stein = [] pvals_imq = [] for i in range(average_over): X = np.random.randn(sample_size, d) X[:, 0] += np.random.rand(sample_size) # baringhaus p value T = baringhaus_stat(X) pval = float(len(samples[samples > T])) / bootstrap_size pvals_brainghaus.append(pval) # gaussian p value me = GaussianQuadraticTest(grad_log_normal) qm = QuadraticMultiple2(me) p = qm.is_from_null(0.1, np.copy(X), 0.5) pvals_stein.append(p) # IMQ p value me2 = MultiquadricQuadraticTest(grad_log_normal, beta=-0.5) qm2 = QuadraticMultiple2(me2) p2 = qm2.is_from_null(0.1, np.copy(X), 0.5) pvals_imq.append(p2) print('d :', d) pvals_brainghaus = np.array(pvals_brainghaus) print( 'baringhaus :', float(len(pvals_brainghaus[pvals_brainghaus < 0.1])) / average_over) pvals_stein = np.array(pvals_stein) print('Stein :', float(len(pvals_stein[pvals_stein < 0.1])) / average_over) pvals_imq = np.array(pvals_imq) print('IMQ :', float(len(pvals_imq[pvals_imq < 0.1])) / average_over)
m.optimize() res = 100 pred_mean, pred_std = m.predict(X_test) plt.plot(X_test, pred_mean, 'b-') plt.plot(X_test, pred_mean + 2 * pred_std, 'b--') plt.plot(X_test, pred_mean - 2 * pred_std, 'b--') plt.plot(X_train, y_train, 'b.', markersize=3) plt.plot(X_test, y_test, 'r.', markersize=5) plt.grid(True) plt.xlabel(r"$X$") plt.ylabel(r"$y$") plt.savefig("gp_regression_data_fit.eps", bbox_inches='tight') plt.show() s = GaussianQuadraticTest(None) gradients = compute_gp_regression_gradients(y_test, pred_mean, pred_std) U_matrix, stat = s.get_statistic_multiple_custom_gradient(y_test[:, 0], gradients[:, 0]) num_test_samples = 10000 null_samples = bootstrap_null(U_matrix, num_bootstrap=num_test_samples) sns.distplot(null_samples, kde=False, norm_hist=True) plt.plot([stat, stat], [0, .012], 'black') plt.legend([r"$V_n$ test", r"Bootstrapped $B_n$"]) plt.xlabel(r"$V_n$") plt.ylabel(r"Frequency") plt.savefig("gp_regression_bootstrap_hist.eps", bbox_inches='tight') plt.show()
arr = [] me = GaussianSteinTest(grad_log_pob, 1) for time in times_we_look_at: chain_at_time = samples[:, time] # print(time) # pval = me.compute_pvalue(chain_at_time) # arr.append(pval) def grad_log_pob(t): a = np.sum(manual_grad(t[0], t[1], X), axis=0) + grad_log_prior(t) return a P_CHANGE = 0.1 me = GaussianQuadraticTest(grad_log_pob) qm = QuadraticMultiple(me) reject, p = qm.is_from_null(0.05, chain_at_time, 0.1) print(reject) # import matplotlib.pyplot as plt # # print(arr) # # plt.plot(arr) # # plt.show()
dfs = range(1, 4, 2) mc_reps = 100 res = np.empty((0, 2)) block = N / np.log(N) p_change = 1.0 / block print(p_change) for df in dfs: for mc in range(mc_reps): print(mc) X = almost_t_student(10 * N, df, 0.01) X = X[::10] me = GaussianQuadraticTest(grad_log_normal) U_stat, _ = me.get_statistic_multiple(X) pval = me.compute_pvalues_for_processes(U_stat, p_change) res = np.vstack((res, np.array([df, pval]))) for mc in range(mc_reps): X = almost_t_student(10 * N, 100, 0.01) X = X[::10] me = GaussianQuadraticTest(grad_log_normal) U_stat, _ = me.get_statistic_multiple(X) pval = me.compute_pvalues_for_processes(U_stat, p_change) res = np.vstack((res, np.array([np.Inf, pval]))) np.save('results.npy', res)
arr = np.empty((0, 2)) arr2 = np.empty((0, 2)) for c in [1.0, 1.3, 2.0, 3.0]: print('c', c) log_normal = logg(c) for i in range(23): print(i) x = metropolis_hastings(log_normal, chain_size=500, thinning=15, x_prev=np.random.randn(2)) me = GaussianQuadraticTest(grad_log_dens) qm = QuadraticMultiple(me) qm2 = QuadraticMultiple2(me) accept_null, p_val = qm.is_from_null(0.05, x, 0.1) p_val2 = qm2.is_from_null(0.05, x, 0.1) print(p_val2) arr = np.vstack((arr, np.array([c, min(p_val)]))) arr2 = np.vstack((arr2, np.array([c, p_val2]))) df = DataFrame(arr) pr = seaborn.boxplot(x=0, y=1, data=df) seaborn.plt.show() df = DataFrame(arr2) pr = seaborn.boxplot(x=0, y=1, data=df)
import numpy as np from stat_test.quadratic_time import GaussianQuadraticTest from tools.tools import store_results if __name__ == '__main__': D = 1 N_test = 500 N_fit = 50000 ms_fit = np.array([1, 2, 5, 10, 25, 50, 75, 100, 250, 500, 1000, 2000, 5000]) sigma = 1 lmbda = 0.01 grad = lambda x: est.grad(np.array([x]))[0] s = GaussianQuadraticTest(grad) num_bootstrap = 200 result_fname = os.path.splitext(os.path.basename(__file__))[0] + ".txt" num_repetitions = 150 for _ in range(num_repetitions): for m in ms_fit: est = KernelExpFiniteGaussian(sigma, lmbda, m, D) X_test = np.random.randn(N_test, D) X = np.random.randn(N_fit, D) est.fit(X) U_matrix, stat = s.get_statistic_multiple(X_test[:,0])
from pandas import DataFrame import seaborn from stat_test.quadratic_time import GaussianQuadraticTest __author__ = 'kcx' import numpy as np def grad_log_normal(x): return -x np.random.seed(42) me = GaussianQuadraticTest(grad_log_normal) res = np.empty((0, 2)) for i in range(50): data = np.random.randn(75) _, s1 = me.get_statisitc(len(data), data) res = np.vstack((res, np.array([75, s1]))) for i in range(50): data = np.random.randn(100) _, s1 = me.get_statisitc(len(data), data) res = np.vstack((res, np.array([100, s1]))) for i in range(50): data = np.random.randn(150) _, s1 = me.get_statisitc(len(data), data)
from pandas import DataFrame import seaborn from stat_test.quadratic_time import GaussianQuadraticTest __author__ = 'kcx' import numpy as np def grad_log_normal(x): return -x np.random.seed(42) me = GaussianQuadraticTest(grad_log_normal) res = np.empty((0,2)) for i in range(50): data = np.random.randn(75) _,s1 = me.get_statisitc(len(data),data) res = np.vstack((res,np.array([75, s1]))) for i in range(50): data = np.random.randn(100) _,s1 = me.get_statisitc(len(data),data) res = np.vstack((res,np.array([100, s1])))
# estimate size of thinning def get_thinning(X, nlags=50): autocorrelation = acf(X, nlags=nlags, fft=True) thinning = np.argmin(np.abs(autocorrelation - 0.95)) + 1 return thinning, autocorrelation # # X = gen(TEST_CHAIN_SIZE, np.Inf) # thinning, autocorr = get_thinning(X) # print('thinning for AR normal simulation ', thinning, autocorr[thinning]) thinning = 1 tester = GaussianQuadraticTest(grad_log_normal) def get_pval(X, tester, p_change): U_stat, _ = tester.get_statistic_multiple(X) return tester.compute_pvalues_for_processes(U_stat, p_change) def get_pair(sample_size, df, thinning, tester, p_change): X = gen(sample_size, df, thinning) pval = get_pval(X, tester, p_change) return [df, pval] P_CHANGE = 0.1 results = []
dfs = range(1, 4, 2) mc_reps = 100 res = np.empty((0,2)) block = N/np.log(N) p_change = 1.0/block print(p_change) for df in dfs: for mc in range(mc_reps): print(mc) X = almost_t_student(10*N,df,0.01) X = X[::10] me = GaussianQuadraticTest(grad_log_normal) U_stat,_ = me.get_statistic_multiple(X) pval = me.compute_pvalues_for_processes(U_stat,p_change) res = np.vstack((res,np.array([df, pval]))) for mc in range(mc_reps): X = almost_t_student(10*N,100,0.01) X = X[::10] me = GaussianQuadraticTest(grad_log_normal) U_stat,_ = me.get_statistic_multiple(X) pval = me.compute_pvalues_for_processes(U_stat,p_change) res = np.vstack((res,np.array([np.Inf, pval]))) np.save('results.npy',res)
import numpy as np from stat_test.quadratic_time import GaussianQuadraticTest from tools.tools import store_results if __name__ == '__main__': D = 1 N_test = 500 N_fit = 50000 ms_fit = np.array( [1, 2, 5, 10, 25, 50, 75, 100, 250, 500, 1000, 2000, 5000]) sigma = 1 lmbda = 0.01 grad = lambda x: est.grad(np.array([x]))[0] s = GaussianQuadraticTest(grad) num_bootstrap = 200 result_fname = os.path.splitext(os.path.basename(__file__))[0] + ".txt" num_repetitions = 150 for _ in range(num_repetitions): for m in ms_fit: est = KernelExpFiniteGaussian(sigma, lmbda, m, D) X_test = np.random.randn(N_test, D) X = np.random.randn(N_fit, D) est.fit(X) U_matrix, stat = s.get_statistic_multiple(X_test[:, 0])