def test_two_dimensional_tests_agrees(self):
     np.random.seed(43)
     me = GaussianQuadraticTest(self.grad_log_normal)
     samples = np.random.randn(10, 2)
     U1, _ = me.get_statisitc_two_dim(10, samples, 1)
     U2, _ = me.get_statistic_multiple_dim(samples, 1)
     np.testing.assert_almost_equal(U1, U2)
 def test_k_multiple_equals_k_no_dim(self):
     N = 10
     X = np.random.randn(N, 1)
     me = GaussianQuadraticTest(self.grad_log_normal)
     K1 = me.k_multiple_dim(X)
     K2 = me.k_multiple(X[:, 0])
     np.testing.assert_almost_equal(K1, K2)
 def test_regression_2(self):
     np.random.seed(42)
     data = np.random.randn(100) * 2.0
     me = GaussianQuadraticTest(self.grad_log_normal)
     U_stat, _ = me.get_statistic_multiple(data)
     pval = me.compute_pvalue(U_stat)
     assert pval == 0.0
 def test_two_dimensional_tests_alt(self):
     np.random.seed(43)
     me = GaussianQuadraticTest(self.grad_log_normal)
     samples = np.random.randn(100, 2) + 1
     U, _ = me.get_statisitc_two_dim(100, samples, 1)
     p = me.compute_pvalue(U)
     assert p == 0
 def test_gk_multiple_dim(self):
     N = 10
     X = np.random.randn(N, 1)
     me = GaussianQuadraticTest(self.grad_log_normal)
     K = me.k_multiple_dim(X)
     gk_alt = me.gk_multiple_dim(X, K, 0)
     gk_orig = me.gk_multiple(X[:, 0])
     np.testing.assert_almost_equal(gk_alt, gk_orig)
    def test_get_statistic_multiple_equals_get_statistic(self):
        N = 10
        X = np.random.randn(N)
        me = GaussianQuadraticTest(self.grad_log_normal)
        U_matrix_multiple, stat_multiple = me.get_statistic_multiple(X)
        U_matrix, stat = me.get_statisitc(N, X)

        assert_allclose(stat, stat_multiple)
        assert_allclose(U_matrix_multiple, U_matrix)
    def test_k_multiple_equals_k_no_grad_multiple_given(self):
        N = 10
        X = np.random.randn(N)
        me = GaussianQuadraticTest(self.grad_log_normal)
        K = me.k_multiple(X)

        for i in range(N):
            for j in range(N):
                k = me.k(X[i], X[j])
                assert_almost_equal(K[i, j], k)
    def test_gk_multiple_equals_gk(self):
        N = 10
        X = np.random.randn(N)
        me = GaussianQuadraticTest(self.grad_log_normal)
        GK = me.gk_multiple(X)

        for i in range(N):
            for j in range(N):
                gk = me.gk(X[i], X[j])
                assert_almost_equal(GK[i, j], gk)
    def test_corr(self):
        np.random.seed(43)
        sigma = np.array([[1, 0.5], [0.5, 1]])

        def grad_log_correleted(x):
            sigmaInv = np.linalg.inv(sigma)
            return -np.dot(sigmaInv.T + sigmaInv, x) / 2.0

        me = GaussianQuadraticTest(grad_log_correleted)
        qm = QuadraticMultiple(me)
        X = np.random.multivariate_normal([0, 0], sigma, 200)

        reject, p_val = qm.is_from_null(0.05, X, 0.1)
        np.testing.assert_almost_equal([0.465, 0.465], p_val)
    def test_k_multiple_equals_k_grad_multiple_given(self):
        def fun(self, X):
            return -X

        N = 10
        X = np.random.randn(N)
        me = GaussianQuadraticTest(self.grad_log_normal,
                                   grad_log_prob_multiple=fun)
        K = me.k_multiple(X)

        for i in range(N):
            for j in range(N):
                k = me.k(X[i], X[j])
                assert_almost_equal(K[i, j], k)
Example #11
0
    def run_simulation(sample_size, bootstrap_size=600, average_over=400):

        for d in [2, 5, 10, 15, 20, 25]:
            samples = []
            for i in range(bootstrap_size):
                samples.append(baringhaus_stat(np.random.randn(sample_size,
                                                               d)))
            samples = np.array(samples)
            pvals_brainghaus = []
            pvals_stein = []
            pvals_imq = []
            for i in range(average_over):
                X = np.random.randn(sample_size, d)
                X[:, 0] += np.random.rand(sample_size)
                # baringhaus p value
                T = baringhaus_stat(X)
                pval = float(len(samples[samples > T])) / bootstrap_size
                pvals_brainghaus.append(pval)
                # gaussian p value
                me = GaussianQuadraticTest(grad_log_normal)
                qm = QuadraticMultiple2(me)
                p = qm.is_from_null(0.1, np.copy(X), 0.5)
                pvals_stein.append(p)
                # IMQ p value
                me2 = MultiquadricQuadraticTest(grad_log_normal, beta=-0.5)
                qm2 = QuadraticMultiple2(me2)
                p2 = qm2.is_from_null(0.1, np.copy(X), 0.5)
                pvals_imq.append(p2)

            print('d :', d)
            pvals_brainghaus = np.array(pvals_brainghaus)
            print(
                'baringhaus :',
                float(len(pvals_brainghaus[pvals_brainghaus < 0.1])) /
                average_over)

            pvals_stein = np.array(pvals_stein)
            print('Stein  :',
                  float(len(pvals_stein[pvals_stein < 0.1])) / average_over)
            pvals_imq = np.array(pvals_imq)
            print('IMQ  :',
                  float(len(pvals_imq[pvals_imq < 0.1])) / average_over)
    m.optimize()
    
    res = 100
    pred_mean, pred_std = m.predict(X_test)
    plt.plot(X_test, pred_mean, 'b-')
    plt.plot(X_test, pred_mean + 2 * pred_std, 'b--')
    plt.plot(X_test, pred_mean - 2 * pred_std, 'b--')
    plt.plot(X_train, y_train, 'b.', markersize=3)
    plt.plot(X_test, y_test, 'r.', markersize=5)
    plt.grid(True)
    plt.xlabel(r"$X$")
    plt.ylabel(r"$y$")
    plt.savefig("gp_regression_data_fit.eps", bbox_inches='tight')
    plt.show()
    
    s = GaussianQuadraticTest(None)
    gradients = compute_gp_regression_gradients(y_test, pred_mean, pred_std)
    U_matrix, stat = s.get_statistic_multiple_custom_gradient(y_test[:, 0], gradients[:, 0])
    
    num_test_samples = 10000
    null_samples = bootstrap_null(U_matrix, num_bootstrap=num_test_samples)
    
    sns.distplot(null_samples, kde=False, norm_hist=True)
    plt.plot([stat, stat], [0, .012], 'black')
    plt.legend([r"$V_n$ test", r"Bootstrapped $B_n$"])
    plt.xlabel(r"$V_n$")
    plt.ylabel(r"Frequency")
    plt.savefig("gp_regression_bootstrap_hist.eps", bbox_inches='tight')
    
    plt.show()
Example #13
0
arr = []

me = GaussianSteinTest(grad_log_pob, 1)

for time in times_we_look_at:
    chain_at_time = samples[:, time]

    # print(time)
    # pval = me.compute_pvalue(chain_at_time)
    # arr.append(pval)
    def grad_log_pob(t):
        a = np.sum(manual_grad(t[0], t[1], X), axis=0) + grad_log_prior(t)
        return a

    P_CHANGE = 0.1

    me = GaussianQuadraticTest(grad_log_pob)
    qm = QuadraticMultiple(me)

    reject, p = qm.is_from_null(0.05, chain_at_time, 0.1)

    print(reject)

# import matplotlib.pyplot as plt
#
# print(arr)
#
# plt.plot(arr)
#
# plt.show()
Example #14
0
dfs = range(1, 4, 2)
mc_reps = 100
res = np.empty((0, 2))

block = N / np.log(N)
p_change = 1.0 / block
print(p_change)

for df in dfs:

    for mc in range(mc_reps):
        print(mc)
        X = almost_t_student(10 * N, df, 0.01)
        X = X[::10]
        me = GaussianQuadraticTest(grad_log_normal)
        U_stat, _ = me.get_statistic_multiple(X)

        pval = me.compute_pvalues_for_processes(U_stat, p_change)
        res = np.vstack((res, np.array([df, pval])))

for mc in range(mc_reps):
    X = almost_t_student(10 * N, 100, 0.01)
    X = X[::10]
    me = GaussianQuadraticTest(grad_log_normal)
    U_stat, _ = me.get_statistic_multiple(X)
    pval = me.compute_pvalues_for_processes(U_stat, p_change)
    res = np.vstack((res, np.array([np.Inf, pval])))

np.save('results.npy', res)
arr = np.empty((0, 2))

arr2 = np.empty((0, 2))
for c in [1.0, 1.3, 2.0, 3.0]:
    print('c', c)

    log_normal = logg(c)

    for i in range(23):
        print(i)
        x = metropolis_hastings(log_normal,
                                chain_size=500,
                                thinning=15,
                                x_prev=np.random.randn(2))

        me = GaussianQuadraticTest(grad_log_dens)
        qm = QuadraticMultiple(me)
        qm2 = QuadraticMultiple2(me)

        accept_null, p_val = qm.is_from_null(0.05, x, 0.1)
        p_val2 = qm2.is_from_null(0.05, x, 0.1)
        print(p_val2)
        arr = np.vstack((arr, np.array([c, min(p_val)])))
        arr2 = np.vstack((arr2, np.array([c, p_val2])))

df = DataFrame(arr)
pr = seaborn.boxplot(x=0, y=1, data=df)
seaborn.plt.show()

df = DataFrame(arr2)
pr = seaborn.boxplot(x=0, y=1, data=df)
import numpy as np
from stat_test.quadratic_time import GaussianQuadraticTest
from tools.tools import store_results

if __name__ == '__main__':
    D = 1
    N_test = 500
    N_fit = 50000
    ms_fit = np.array(
        [1, 2, 5, 10, 25, 50, 75, 100, 250, 500, 1000, 2000, 5000])

    sigma = 1
    lmbda = 0.01

    grad = lambda x: est.grad(np.array([x]))[0]
    s = GaussianQuadraticTest(grad)
    num_bootstrap = 200

    result_fname = os.path.splitext(os.path.basename(__file__))[0] + ".txt"

    num_repetitions = 150
    for _ in range(num_repetitions):
        for m in ms_fit:
            est = KernelExpFiniteGaussian(sigma, lmbda, m, D)
            X_test = np.random.randn(N_test, D)

            X = np.random.randn(N_fit, D)
            est.fit(X)

            U_matrix, stat = s.get_statistic_multiple(X_test[:, 0])