def test_two_dimensional_tests_agrees(self):
     np.random.seed(43)
     me = GaussianQuadraticTest(self.grad_log_normal)
     samples = np.random.randn(10,2)
     U1,_ = me.get_statisitc_two_dim(10,samples,1)
     U2,_ = me.get_statistic_multiple_dim(samples,1)
     np.testing.assert_almost_equal(U1,U2)
 def test_two_dimensional_tests_agrees(self):
     np.random.seed(43)
     me = GaussianQuadraticTest(self.grad_log_normal)
     samples = np.random.randn(10, 2)
     U1, _ = me.get_statisitc_two_dim(10, samples, 1)
     U2, _ = me.get_statistic_multiple_dim(samples, 1)
     np.testing.assert_almost_equal(U1, U2)
 def test_regression_2(self):
     np.random.seed(42)
     data = np.random.randn(100) * 2.0
     me = GaussianQuadraticTest(self.grad_log_normal)
     U_stat, _ = me.get_statistic_multiple(data)
     pval = me.compute_pvalue(U_stat)
     assert pval == 0.0
 def test_k_multiple_equals_k_no_dim(self):
     N = 10
     X = np.random.randn(N,1)
     me = GaussianQuadraticTest(self.grad_log_normal)
     K1 = me.k_multiple_dim(X)
     K2  =me.k_multiple(X[:,0])
     np.testing.assert_almost_equal(K1, K2)
 def test_two_dimensional_tests_alt(self):
     np.random.seed(43)
     me = GaussianQuadraticTest(self.grad_log_normal)
     samples = np.random.randn(100, 2) + 1
     U, _ = me.get_statisitc_two_dim(100, samples, 1)
     p = me.compute_pvalue(U)
     assert p == 0
 def test_regression_2(self):
     np.random.seed(42)
     data = np.random.randn(100) * 2.0
     me = GaussianQuadraticTest(self.grad_log_normal)
     U_stat,_ = me.get_statistic_multiple(data)
     pval = me.compute_pvalue(U_stat)
     assert pval == 0.0
 def test_k_multiple_equals_k_no_dim(self):
     N = 10
     X = np.random.randn(N, 1)
     me = GaussianQuadraticTest(self.grad_log_normal)
     K1 = me.k_multiple_dim(X)
     K2 = me.k_multiple(X[:, 0])
     np.testing.assert_almost_equal(K1, K2)
 def test_two_dimensional_tests_alt(self):
     np.random.seed(43)
     me = GaussianQuadraticTest(self.grad_log_normal)
     samples = np.random.randn(100,2)+1
     U,_ = me.get_statisitc_two_dim(100,samples,1)
     p = me.compute_pvalue(U)
     assert p == 0
 def test_gk_multiple_dim(self):
     N = 10
     X = np.random.randn(N, 1)
     me = GaussianQuadraticTest(self.grad_log_normal)
     K = me.k_multiple_dim(X)
     gk_alt = me.gk_multiple_dim(X, K, 0)
     gk_orig = me.gk_multiple(X[:, 0])
     np.testing.assert_almost_equal(gk_alt, gk_orig)
 def test_g1k_multiple_dim(self):
     N = 10
     X = np.random.randn(N,1)
     me = GaussianQuadraticTest(self.grad_log_normal)
     K = me.k_multiple_dim(X)
     g1k_alt = me.g1k_multiple_dim(X,K,0)
     g1k_orig = me.g1k_multiple(X[:,0])
     np.testing.assert_almost_equal(g1k_alt, g1k_orig)
 def test_get_statistic_multiple_equals_get_statistic(self):
     N = 10
     X = np.random.randn(N)
     me = GaussianQuadraticTest(self.grad_log_normal)
     U_matrix_multiple, stat_multiple = me.get_statistic_multiple(X)
     U_matrix, stat = me.get_statisitc(N, X)
     
     assert_allclose(stat, stat_multiple)
     assert_allclose(U_matrix_multiple, U_matrix)
    def test_get_statistic_multiple_equals_get_statistic(self):
        N = 10
        X = np.random.randn(N)
        me = GaussianQuadraticTest(self.grad_log_normal)
        U_matrix_multiple, stat_multiple = me.get_statistic_multiple(X)
        U_matrix, stat = me.get_statisitc(N, X)

        assert_allclose(stat, stat_multiple)
        assert_allclose(U_matrix_multiple, U_matrix)
 def test_gk_multiple_equals_gk(self):
     N = 10
     X = np.random.randn(N)
     me = GaussianQuadraticTest(self.grad_log_normal)
     GK = me.gk_multiple(X)
      
     for i in range(N):
         for j in range(N):
             gk = me.gk(X[i], X[j])
             assert_almost_equal(GK[i, j], gk)
    def test_gk_multiple_equals_gk(self):
        N = 10
        X = np.random.randn(N)
        me = GaussianQuadraticTest(self.grad_log_normal)
        GK = me.gk_multiple(X)

        for i in range(N):
            for j in range(N):
                gk = me.gk(X[i], X[j])
                assert_almost_equal(GK[i, j], gk)
 def test_k_multiple_equals_k_no_grad_multiple_given(self):
     N = 10
     X = np.random.randn(N)
     me = GaussianQuadraticTest(self.grad_log_normal)
     K = me.k_multiple(X)
     
     for i in range(N):
         for j in range(N):
             k = me.k(X[i], X[j])
             assert_almost_equal(K[i, j], k)
    def test_k_multiple_equals_k_no_grad_multiple_given(self):
        N = 10
        X = np.random.randn(N)
        me = GaussianQuadraticTest(self.grad_log_normal)
        K = me.k_multiple(X)

        for i in range(N):
            for j in range(N):
                k = me.k(X[i], X[j])
                assert_almost_equal(K[i, j], k)
def compare_against_mmd_test():
    data = loadmat("../data/02-solar.mat")
    X = data["X"]
    y = data["y"]

    X_train, y_train, X_test, y_test, N, N_test = prepare_dataset(X, y)

    kernel = RBF(input_dim=1, variance=0.608, lengthscale=0.207)
    m = GPRegression(X_train, y_train, kernel, noise_var=0.283)
    m.optimize()
    pred_mean, pred_std = m.predict(X_test)

    s = GaussianQuadraticTest(None)
    gradients = compute_gp_regression_gradients(y_test, pred_mean, pred_std)
    U_matrix, stat = s.get_statistic_multiple_custom_gradient(y_test[:, 0], gradients[:, 0])
    num_test_samples = 10000
    null_samples = bootstrap_null(U_matrix, num_bootstrap=num_test_samples)
    #     null_samples = sample_null_simulated_gp(s, pred_mean, pred_std, num_test_samples)
    p_value_ours = 1.0 - np.mean(null_samples <= stat)

    y_rep = np.random.randn(len(X_test)) * pred_std.flatten() + pred_mean.flatten()
    y_rep = np.atleast_2d(y_rep).T
    A = np.hstack((X_test, y_test))
    B = np.hstack((X_test, y_rep))
    feats_p = RealFeatures(A.T)
    feats_q = RealFeatures(B.T)
    width = 1
    kernel = GaussianKernel(10, width)
    mmd = QuadraticTimeMMD()
    mmd.set_kernel(kernel)
    mmd.set_p(feats_p)
    mmd.set_q(feats_q)
    mmd_stat = mmd.compute_statistic()

    # sample from null
    num_null_samples = 10000
    mmd_null_samples = np.zeros(num_null_samples)
    for i in range(num_null_samples):
        # fix y_rep from above, and change the other one (that would replace y_test)
        y_rep2 = np.random.randn(len(X_test)) * pred_std.flatten() + pred_mean.flatten()
        y_rep2 = np.atleast_2d(y_rep2).T
        A = np.hstack((X_test, y_rep2))
        feats_p = RealFeatures(A.T)
        width = 1
        kernel = GaussianKernel(10, width)
        mmd = QuadraticTimeMMD()
        mmd.set_kernel(kernel)
        mmd.set_p(feats_p)
        mmd.set_q(feats_q)
        mmd_null_samples[i] = mmd.compute_statistic()

    p_value_mmd = 1.0 - np.mean(mmd_null_samples <= mmd_stat)

    return p_value_ours, p_value_mmd
 def test_k_multiple_equals_k_grad_multiple_given(self):
     def fun(self, X):
         return -X
     
     N = 10
     X = np.random.randn(N)
     me = GaussianQuadraticTest(self.grad_log_normal, grad_log_prob_multiple=fun)
     K = me.k_multiple(X)
     
     for i in range(N):
         for j in range(N):
             k = me.k(X[i], X[j])
             assert_almost_equal(K[i, j], k)
    def test_k_multiple_equals_k_grad_multiple_given(self):
        def fun(self, X):
            return -X

        N = 10
        X = np.random.randn(N)
        me = GaussianQuadraticTest(self.grad_log_normal,
                                   grad_log_prob_multiple=fun)
        K = me.k_multiple(X)

        for i in range(N):
            for j in range(N):
                k = me.k(X[i], X[j])
                assert_almost_equal(K[i, j], k)
    def test_corr(self):
        np.random.seed(43)
        sigma = np.array([[1, 0.5], [0.5, 1]])

        def grad_log_correleted(x):
            sigmaInv = np.linalg.inv(sigma)
            return -np.dot(sigmaInv.T + sigmaInv, x) / 2.0

        me = GaussianQuadraticTest(grad_log_correleted)
        qm = QuadraticMultiple(me)
        X = np.random.multivariate_normal([0, 0], sigma, 200)

        reject, p_val = qm.is_from_null(0.05, X, 0.1)
        np.testing.assert_almost_equal([0.465, 0.465], p_val)
Example #21
0
    def run_simulation(sample_size, bootstrap_size=600, average_over=400):

        for d in [2, 5, 10, 15, 20, 25]:
            samples = []
            for i in range(bootstrap_size):
                samples.append(baringhaus_stat(np.random.randn(sample_size,
                                                               d)))
            samples = np.array(samples)
            pvals_brainghaus = []
            pvals_stein = []
            pvals_imq = []
            for i in range(average_over):
                X = np.random.randn(sample_size, d)
                X[:, 0] += np.random.rand(sample_size)
                # baringhaus p value
                T = baringhaus_stat(X)
                pval = float(len(samples[samples > T])) / bootstrap_size
                pvals_brainghaus.append(pval)
                # gaussian p value
                me = GaussianQuadraticTest(grad_log_normal)
                qm = QuadraticMultiple2(me)
                p = qm.is_from_null(0.1, np.copy(X), 0.5)
                pvals_stein.append(p)
                # IMQ p value
                me2 = MultiquadricQuadraticTest(grad_log_normal, beta=-0.5)
                qm2 = QuadraticMultiple2(me2)
                p2 = qm2.is_from_null(0.1, np.copy(X), 0.5)
                pvals_imq.append(p2)

            print('d :', d)
            pvals_brainghaus = np.array(pvals_brainghaus)
            print(
                'baringhaus :',
                float(len(pvals_brainghaus[pvals_brainghaus < 0.1])) /
                average_over)

            pvals_stein = np.array(pvals_stein)
            print('Stein  :',
                  float(len(pvals_stein[pvals_stein < 0.1])) / average_over)
            pvals_imq = np.array(pvals_imq)
            print('IMQ  :',
                  float(len(pvals_imq[pvals_imq < 0.1])) / average_over)
    m.optimize()
    
    res = 100
    pred_mean, pred_std = m.predict(X_test)
    plt.plot(X_test, pred_mean, 'b-')
    plt.plot(X_test, pred_mean + 2 * pred_std, 'b--')
    plt.plot(X_test, pred_mean - 2 * pred_std, 'b--')
    plt.plot(X_train, y_train, 'b.', markersize=3)
    plt.plot(X_test, y_test, 'r.', markersize=5)
    plt.grid(True)
    plt.xlabel(r"$X$")
    plt.ylabel(r"$y$")
    plt.savefig("gp_regression_data_fit.eps", bbox_inches='tight')
    plt.show()
    
    s = GaussianQuadraticTest(None)
    gradients = compute_gp_regression_gradients(y_test, pred_mean, pred_std)
    U_matrix, stat = s.get_statistic_multiple_custom_gradient(y_test[:, 0], gradients[:, 0])
    
    num_test_samples = 10000
    null_samples = bootstrap_null(U_matrix, num_bootstrap=num_test_samples)
    
    sns.distplot(null_samples, kde=False, norm_hist=True)
    plt.plot([stat, stat], [0, .012], 'black')
    plt.legend([r"$V_n$ test", r"Bootstrapped $B_n$"])
    plt.xlabel(r"$V_n$")
    plt.ylabel(r"Frequency")
    plt.savefig("gp_regression_bootstrap_hist.eps", bbox_inches='tight')
    
    plt.show()
Example #23
0
arr = []

me = GaussianSteinTest(grad_log_pob, 1)

for time in times_we_look_at:
    chain_at_time = samples[:, time]

    # print(time)
    # pval = me.compute_pvalue(chain_at_time)
    # arr.append(pval)
    def grad_log_pob(t):
        a = np.sum(manual_grad(t[0], t[1], X), axis=0) + grad_log_prior(t)
        return a

    P_CHANGE = 0.1

    me = GaussianQuadraticTest(grad_log_pob)
    qm = QuadraticMultiple(me)

    reject, p = qm.is_from_null(0.05, chain_at_time, 0.1)

    print(reject)

# import matplotlib.pyplot as plt
#
# print(arr)
#
# plt.plot(arr)
#
# plt.show()
Example #24
0
dfs = range(1, 4, 2)
mc_reps = 100
res = np.empty((0, 2))

block = N / np.log(N)
p_change = 1.0 / block
print(p_change)

for df in dfs:

    for mc in range(mc_reps):
        print(mc)
        X = almost_t_student(10 * N, df, 0.01)
        X = X[::10]
        me = GaussianQuadraticTest(grad_log_normal)
        U_stat, _ = me.get_statistic_multiple(X)

        pval = me.compute_pvalues_for_processes(U_stat, p_change)
        res = np.vstack((res, np.array([df, pval])))

for mc in range(mc_reps):
    X = almost_t_student(10 * N, 100, 0.01)
    X = X[::10]
    me = GaussianQuadraticTest(grad_log_normal)
    U_stat, _ = me.get_statistic_multiple(X)
    pval = me.compute_pvalues_for_processes(U_stat, p_change)
    res = np.vstack((res, np.array([np.Inf, pval])))

np.save('results.npy', res)
arr = np.empty((0, 2))

arr2 = np.empty((0, 2))
for c in [1.0, 1.3, 2.0, 3.0]:
    print('c', c)

    log_normal = logg(c)

    for i in range(23):
        print(i)
        x = metropolis_hastings(log_normal,
                                chain_size=500,
                                thinning=15,
                                x_prev=np.random.randn(2))

        me = GaussianQuadraticTest(grad_log_dens)
        qm = QuadraticMultiple(me)
        qm2 = QuadraticMultiple2(me)

        accept_null, p_val = qm.is_from_null(0.05, x, 0.1)
        p_val2 = qm2.is_from_null(0.05, x, 0.1)
        print(p_val2)
        arr = np.vstack((arr, np.array([c, min(p_val)])))
        arr2 = np.vstack((arr2, np.array([c, p_val2])))

df = DataFrame(arr)
pr = seaborn.boxplot(x=0, y=1, data=df)
seaborn.plt.show()

df = DataFrame(arr2)
pr = seaborn.boxplot(x=0, y=1, data=df)
import numpy as np
from stat_test.quadratic_time import GaussianQuadraticTest
from tools.tools import store_results


if __name__ == '__main__':
    D = 1
    N_test = 500
    N_fit = 50000
    ms_fit = np.array([1, 2, 5, 10, 25, 50, 75, 100, 250, 500, 1000, 2000, 5000])
    
    sigma = 1
    lmbda = 0.01
    
    grad = lambda x: est.grad(np.array([x]))[0]
    s =  GaussianQuadraticTest(grad)
    num_bootstrap = 200
    
    result_fname = os.path.splitext(os.path.basename(__file__))[0] + ".txt"
    
    num_repetitions = 150
    for _ in range(num_repetitions):
        for m in ms_fit:
            est = KernelExpFiniteGaussian(sigma, lmbda, m, D)
            X_test = np.random.randn(N_test, D)
            
            X = np.random.randn(N_fit, D)
            est.fit(X)
            
            U_matrix, stat = s.get_statistic_multiple(X_test[:,0])
        
Example #27
0
from pandas import DataFrame
import seaborn
from stat_test.quadratic_time import GaussianQuadraticTest

__author__ = 'kcx'
import numpy as np


def grad_log_normal(x):
    return -x


np.random.seed(42)
me = GaussianQuadraticTest(grad_log_normal)

res = np.empty((0, 2))

for i in range(50):
    data = np.random.randn(75)

    _, s1 = me.get_statisitc(len(data), data)
    res = np.vstack((res, np.array([75, s1])))

for i in range(50):
    data = np.random.randn(100)
    _, s1 = me.get_statisitc(len(data), data)
    res = np.vstack((res, np.array([100, s1])))

for i in range(50):
    data = np.random.randn(150)
    _, s1 = me.get_statisitc(len(data), data)
Example #28
0
from pandas import DataFrame
import seaborn
from stat_test.quadratic_time import GaussianQuadraticTest

__author__ = 'kcx'
import numpy as  np


def grad_log_normal(x):
    return  -x


np.random.seed(42)
me = GaussianQuadraticTest(grad_log_normal)

res = np.empty((0,2))


for i in range(50):
    data = np.random.randn(75)

    _,s1 = me.get_statisitc(len(data),data)
    res = np.vstack((res,np.array([75, s1])))



for i in range(50):
    data = np.random.randn(100)
    _,s1 = me.get_statisitc(len(data),data)
    res = np.vstack((res,np.array([100, s1])))
Example #29
0

# estimate size of thinning
def get_thinning(X, nlags=50):
    autocorrelation = acf(X, nlags=nlags, fft=True)
    thinning = np.argmin(np.abs(autocorrelation - 0.95)) + 1
    return thinning, autocorrelation


#
# X = gen(TEST_CHAIN_SIZE, np.Inf)
# thinning, autocorr = get_thinning(X)
# print('thinning for AR normal simulation ', thinning, autocorr[thinning])

thinning = 1
tester = GaussianQuadraticTest(grad_log_normal)


def get_pval(X, tester, p_change):
    U_stat, _ = tester.get_statistic_multiple(X)
    return tester.compute_pvalues_for_processes(U_stat, p_change)


def get_pair(sample_size, df, thinning, tester, p_change):
    X = gen(sample_size, df, thinning)
    pval = get_pval(X, tester, p_change)
    return [df, pval]


P_CHANGE = 0.1
results = []
Example #30
0
dfs = range(1, 4, 2)
mc_reps = 100
res = np.empty((0,2))

block = N/np.log(N)
p_change  = 1.0/block
print(p_change)

for df in dfs:

    for mc in range(mc_reps):
        print(mc)
        X = almost_t_student(10*N,df,0.01)
        X = X[::10]
        me = GaussianQuadraticTest(grad_log_normal)
        U_stat,_ = me.get_statistic_multiple(X)

        pval = me.compute_pvalues_for_processes(U_stat,p_change)
        res = np.vstack((res,np.array([df, pval])))

for mc in range(mc_reps):
        X = almost_t_student(10*N,100,0.01)
        X = X[::10]
        me = GaussianQuadraticTest(grad_log_normal)
        U_stat,_ = me.get_statistic_multiple(X)
        pval = me.compute_pvalues_for_processes(U_stat,p_change)
        res = np.vstack((res,np.array([np.Inf, pval])))

np.save('results.npy',res)
import numpy as np
from stat_test.quadratic_time import GaussianQuadraticTest
from tools.tools import store_results

if __name__ == '__main__':
    D = 1
    N_test = 500
    N_fit = 50000
    ms_fit = np.array(
        [1, 2, 5, 10, 25, 50, 75, 100, 250, 500, 1000, 2000, 5000])

    sigma = 1
    lmbda = 0.01

    grad = lambda x: est.grad(np.array([x]))[0]
    s = GaussianQuadraticTest(grad)
    num_bootstrap = 200

    result_fname = os.path.splitext(os.path.basename(__file__))[0] + ".txt"

    num_repetitions = 150
    for _ in range(num_repetitions):
        for m in ms_fit:
            est = KernelExpFiniteGaussian(sigma, lmbda, m, D)
            X_test = np.random.randn(N_test, D)

            X = np.random.randn(N_fit, D)
            est.fit(X)

            U_matrix, stat = s.get_statistic_multiple(X_test[:, 0])