def test_1D_ks_2samp(self):
     # Compare with scipy.stats.ks_2samp
     x = np.random.randn(50) + 1
     y = np.random.randn(50)
     s, p = stats.ks_2samp(x, y)
     dm = dd.kolmogorov_smirnov(x, y)
     aaeq(dm, s, 3)
 def test_1D_ks_2samp(self):
     # Compare with scipy.stats.ks_2samp
     x = np.random.randn(50) + 1
     y = np.random.randn(50)
     s, p = stats.ks_2samp(x, y)
     dm = dd.kolmogorov_smirnov(x, y)
     aaeq(dm, s, 3)
    def test_mvnormal(self):
        """Compare the results to the figure 2 in the paper."""
        from numpy.random import normal, multivariate_normal

        N = 30000
        p = normal(0, 1, size=(N, 2))
        np.random.seed(1)
        q = multivariate_normal([.5, -.5], [[.5, .1], [.1, .3]], size=N)

        aaeq(dd.kldiv(p, q), 1.39, 1)
        aaeq(dd.kldiv(q, p), 0.62, 1)
    def test_mvnormal(self):
        """Compare the results to the figure 2 in the paper."""
        from numpy.random import normal, multivariate_normal

        n = 30000
        p = normal(0, 1, size=(n, 2))
        np.random.seed(1)
        q = multivariate_normal([.5, -.5], [[.5, .1], [.1, .3]], size=n)

        aaeq(dd.kldiv(p, q), 1.39, 1)
        aaeq(dd.kldiv(q, p), 0.62, 1)
    def test_simple(self):
        # Over these 7 points, there are 2 with edges within the same sample.
        # [1,2]-[2,2] & [3,2]-[4,2]
        # |
        # |   x
        # | o o x x
        # | x  o
        # |_ _ _ _ _ _ _
        x = np.array([[1, 2], [2, 2], [3, 1]])
        y = np.array([[1, 1], [2, 4], [3, 2], [4, 2]])

        dm = dd.friedman_rafsky(x, y)
        aaeq(dm, 2. / 7, 3)
    def test_simple(self):
        # Over these 7 points, there are 2 with edges within the same sample.
        # [1,2]-[2,2] & [3,2]-[4,2]
        # |
        # |   x
        # | o o x x
        # | x  o
        # |_ _ _ _ _ _ _
        x = np.array([[1, 2], [2, 2], [3, 1]])
        y = np.array([[1, 1], [2, 4], [3, 2], [4, 2]])

        dm = dd.friedman_rafsky(x, y)
        aaeq(dm, 2. / 7, 3)
    def test_against_analytic(self):
        p = stats.norm(2, 1)
        q = stats.norm(2.6, 1.4)

        ra = analytical_KLDiv(p, q)

        N = 10000
        np.random.seed(2)
        # x, y = p.rvs(N), q.rvs(N)

        re = dd.kldiv(p.rvs(N), q.rvs(N))

        aaeq(re, ra, 1)
    def test_against_analytic(self):
        p = stats.norm(2, 1)
        q = stats.norm(2.6, 1.4)

        ra = analytical_KLDiv(p, q)

        N = 10000
        np.random.seed(2)
        x, y = p.rvs(N), q.rvs(N)

        re = dd.kldiv(p.rvs(N), q.rvs(N))

        aaeq(re, ra, 1)
    def test_simple(self):
        d = 2
        n, m = 25, 30

        x = randn(0, 1, (n, d))
        y = randn([1, 2], 1, (m, d))
        dm = dd.seuclidean(x, y)
        aaeq(dm, np.hypot(1, 2), 2)

        # Variance of the candidate sample does not affect answer.
        x = randn(0, 1, (n, d))
        y = randn([1, 2], 2, (m, d))
        dm = dd.seuclidean(x, y)
        aaeq(dm, np.hypot(1, 2), 2)
    def test_simple(self):
        d = 2
        n, m = 25, 30

        x = randn(0, 1, (n, d))
        y = randn([1, 2], 1, (m, d))
        dm = dd.seuclidean(x, y)
        aaeq(dm, np.hypot(1, 2), 2)

        # Variance of the candidate sample does not affect answer.
        x = randn(0, 1, (n, d))
        y = randn([1, 2], 2, (m, d))
        dm = dd.seuclidean(x, y)
        aaeq(dm, np.hypot(1, 2), 2)
    def check_different_sample_size(self):
        p = stats.norm(2, 1)
        q = stats.norm(2.6, 1.4)

        ra = analytical_KLDiv(p, q)

        n = 6000
        # Same sample size for x and y
        re = [dd.kldiv(p.rvs(n), q.rvs(n)) for i in range(30)]
        aaeq(np.mean(re), ra, 2)

        # Different sample sizes
        re = [dd.kldiv(p.rvs(n * 2), q.rvs(n)) for i in range(30)]
        aaeq(np.mean(re), ra, 2)

        re = [dd.kldiv(p.rvs(n), q.rvs(n * 2)) for i in range(30)]
        aaeq(np.mean(re), ra, 2)
    def check_different_sample_size(self):
        p = stats.norm(2, 1)
        q = stats.norm(2.6, 1.4)

        ra = analytical_KLDiv(p, q)

        N = 6000
        # Same sample size for x and y
        re = [dd.kldiv(p.rvs(N), q.rvs(N)) for i in range(30)]
        aaeq(np.mean(re), ra, 2)

        # Different sample sizes
        re = [dd.kldiv(p.rvs(N * 2), q.rvs(N)) for i in range(30)]
        aaeq(np.mean(re), ra, 2)

        re = [dd.kldiv(p.rvs(N), q.rvs(N * 2)) for i in range(30)]
        aaeq(np.mean(re), ra, 2)
    def test_simple(self):
        d = 2
        n, m = 200, 200
        np.random.seed(1)
        x = np.random.randn(n, d)
        y = np.random.randn(m, d)

        # Almost identical samples
        dm = dd.nearest_neighbor(x + .001, x)
        aaeq(dm, 0, 2)

        # Same distribution but mixed
        dm = dd.nearest_neighbor(x, y)
        aaeq(dm, 0.5, 1)

        # Two completely different distributions
        dm = dd.nearest_neighbor(x + 10, y)
        aaeq(dm, 1, 2)
    def test_simple(self):
        d = 2
        n, m = 200, 200
        np.random.seed(1)
        x = np.random.randn(n, d)
        y = np.random.randn(m, d)

        # Almost identical samples
        dm = dd.nearest_neighbor(x + .001, x)
        aaeq(dm, 0, 2)

        # Same distribution but mixed
        dm = dd.nearest_neighbor(x, y)
        aaeq(dm, 0.5, 1)

        # Two completely different distributions
        dm = dd.nearest_neighbor(x + 10, y)
        aaeq(dm, 1, 2)
 def test_compare_with_matlab(self):
     x, y = matlab_sample()
     dm = dd.seuclidean(x, y)
     aaeq(dm, 2.8463, 4)
 def test_compare_with_matlab(self):
     x, y = matlab_sample()
     dm = dd.friedman_rafsky(x, y)
     aaeq(dm, 0.96667, 4)
 def check_accuracy(self):
     m, s = self.accuracy_vs_kth(N=500, trials=300)
     aaeq(np.mean(m[0:2]), 0, 2)
 def test_compare_with_matlab(self):
     x, y = matlab_sample()
     dm = dd.kolmogorov_smirnov(x, y)
     aaeq(dm, 0.96667, 4)
 def test_compare_with_matlab(self):
     x, y = matlab_sample()
     dm = dd.kolmogorov_smirnov(x, y)
     aaeq(dm, 0.96667, 4)
 def test_compare_with_matlab(self):
     x, y = matlab_sample()
     dm = dd.friedman_rafsky(x, y)
     aaeq(dm, 0.96667, 4)
 def check_accuracy(self):
     m, _ = self.accuracy_vs_kth(n=500, trials=300)
     aaeq(np.mean(m[0:2]), 0, 2)
def test_randn():
    mu, std = [2, 3], [1, 2]
    r = randn(mu, std, [10, 2])
    aaeq(r.mean(0), mu)
    aaeq(r.std(0, ddof=1), std)
def test_randn():
    mu, std = [2, 3], [1, 2]
    r = randn(mu, std, [10, 2])
    aaeq(r.mean(0), mu)
    aaeq(r.std(0, ddof=1), std)
 def test_compare_with_matlab(self):
     x, y = matlab_sample()
     dm = dd.nearest_neighbor(x, y)
     aaeq(dm, 1, 4)
 def test_compare_with_matlab(self):
     x, y = matlab_sample()
     dm = dd.seuclidean(x, y)
     aaeq(dm, 2.8463, 4)
 def test_compare_with_matlab(self):
     x, y = matlab_sample()
     dm = dd.zech_aslan(x, y)
     aaeq(dm, 0.77802, 4)
 def test_compare_with_matlab(self):
     x, y = matlab_sample()
     dm = dd.nearest_neighbor(x, y)
     aaeq(dm, 1, 4)
 def test_compare_with_matlab(self):
     x, y = matlab_sample()
     dm = dd.zech_aslan(x, y)
     aaeq(dm, 0.77802, 4)