def test_1D_ks_2samp(self): # Compare with scipy.stats.ks_2samp x = np.random.randn(50) + 1 y = np.random.randn(50) s, p = stats.ks_2samp(x, y) dm = dd.kolmogorov_smirnov(x, y) aaeq(dm, s, 3)
def test_mvnormal(self): """Compare the results to the figure 2 in the paper.""" from numpy.random import normal, multivariate_normal N = 30000 p = normal(0, 1, size=(N, 2)) np.random.seed(1) q = multivariate_normal([.5, -.5], [[.5, .1], [.1, .3]], size=N) aaeq(dd.kldiv(p, q), 1.39, 1) aaeq(dd.kldiv(q, p), 0.62, 1)
def test_mvnormal(self): """Compare the results to the figure 2 in the paper.""" from numpy.random import normal, multivariate_normal n = 30000 p = normal(0, 1, size=(n, 2)) np.random.seed(1) q = multivariate_normal([.5, -.5], [[.5, .1], [.1, .3]], size=n) aaeq(dd.kldiv(p, q), 1.39, 1) aaeq(dd.kldiv(q, p), 0.62, 1)
def test_simple(self): # Over these 7 points, there are 2 with edges within the same sample. # [1,2]-[2,2] & [3,2]-[4,2] # | # | x # | o o x x # | x o # |_ _ _ _ _ _ _ x = np.array([[1, 2], [2, 2], [3, 1]]) y = np.array([[1, 1], [2, 4], [3, 2], [4, 2]]) dm = dd.friedman_rafsky(x, y) aaeq(dm, 2. / 7, 3)
def test_against_analytic(self): p = stats.norm(2, 1) q = stats.norm(2.6, 1.4) ra = analytical_KLDiv(p, q) N = 10000 np.random.seed(2) # x, y = p.rvs(N), q.rvs(N) re = dd.kldiv(p.rvs(N), q.rvs(N)) aaeq(re, ra, 1)
def test_against_analytic(self): p = stats.norm(2, 1) q = stats.norm(2.6, 1.4) ra = analytical_KLDiv(p, q) N = 10000 np.random.seed(2) x, y = p.rvs(N), q.rvs(N) re = dd.kldiv(p.rvs(N), q.rvs(N)) aaeq(re, ra, 1)
def test_simple(self): d = 2 n, m = 25, 30 x = randn(0, 1, (n, d)) y = randn([1, 2], 1, (m, d)) dm = dd.seuclidean(x, y) aaeq(dm, np.hypot(1, 2), 2) # Variance of the candidate sample does not affect answer. x = randn(0, 1, (n, d)) y = randn([1, 2], 2, (m, d)) dm = dd.seuclidean(x, y) aaeq(dm, np.hypot(1, 2), 2)
def check_different_sample_size(self): p = stats.norm(2, 1) q = stats.norm(2.6, 1.4) ra = analytical_KLDiv(p, q) n = 6000 # Same sample size for x and y re = [dd.kldiv(p.rvs(n), q.rvs(n)) for i in range(30)] aaeq(np.mean(re), ra, 2) # Different sample sizes re = [dd.kldiv(p.rvs(n * 2), q.rvs(n)) for i in range(30)] aaeq(np.mean(re), ra, 2) re = [dd.kldiv(p.rvs(n), q.rvs(n * 2)) for i in range(30)] aaeq(np.mean(re), ra, 2)
def check_different_sample_size(self): p = stats.norm(2, 1) q = stats.norm(2.6, 1.4) ra = analytical_KLDiv(p, q) N = 6000 # Same sample size for x and y re = [dd.kldiv(p.rvs(N), q.rvs(N)) for i in range(30)] aaeq(np.mean(re), ra, 2) # Different sample sizes re = [dd.kldiv(p.rvs(N * 2), q.rvs(N)) for i in range(30)] aaeq(np.mean(re), ra, 2) re = [dd.kldiv(p.rvs(N), q.rvs(N * 2)) for i in range(30)] aaeq(np.mean(re), ra, 2)
def test_simple(self): d = 2 n, m = 200, 200 np.random.seed(1) x = np.random.randn(n, d) y = np.random.randn(m, d) # Almost identical samples dm = dd.nearest_neighbor(x + .001, x) aaeq(dm, 0, 2) # Same distribution but mixed dm = dd.nearest_neighbor(x, y) aaeq(dm, 0.5, 1) # Two completely different distributions dm = dd.nearest_neighbor(x + 10, y) aaeq(dm, 1, 2)
def test_compare_with_matlab(self): x, y = matlab_sample() dm = dd.seuclidean(x, y) aaeq(dm, 2.8463, 4)
def test_compare_with_matlab(self): x, y = matlab_sample() dm = dd.friedman_rafsky(x, y) aaeq(dm, 0.96667, 4)
def check_accuracy(self): m, s = self.accuracy_vs_kth(N=500, trials=300) aaeq(np.mean(m[0:2]), 0, 2)
def test_compare_with_matlab(self): x, y = matlab_sample() dm = dd.kolmogorov_smirnov(x, y) aaeq(dm, 0.96667, 4)
def check_accuracy(self): m, _ = self.accuracy_vs_kth(n=500, trials=300) aaeq(np.mean(m[0:2]), 0, 2)
def test_randn(): mu, std = [2, 3], [1, 2] r = randn(mu, std, [10, 2]) aaeq(r.mean(0), mu) aaeq(r.std(0, ddof=1), std)
def test_compare_with_matlab(self): x, y = matlab_sample() dm = dd.nearest_neighbor(x, y) aaeq(dm, 1, 4)
def test_compare_with_matlab(self): x, y = matlab_sample() dm = dd.zech_aslan(x, y) aaeq(dm, 0.77802, 4)