def testPdistExecution(self):
        from scipy.spatial.distance import pdist as sp_pdist

        raw = np.random.rand(100, 10)

        # test 1 chunk
        x = tensor(raw, chunk_size=100)

        dist = distance.pdist(x)
        result = self._executor.execute_tensor(dist, concat=True)[0]
        expected = sp_pdist(raw)
        np.testing.assert_array_equal(result, expected)

        dist = distance.pdist(x, metric='hamming')
        result = self._executor.execute_tensor(dist, concat=True)[0]
        expected = sp_pdist(raw, metric='hamming')
        np.testing.assert_array_equal(result, expected)

        f = lambda u, v: np.sqrt(((u - v)**2).sum())
        dist = distance.pdist(x, metric=f)
        result = self._executor.execute_tensor(dist, concat=True)[0]
        expected = sp_pdist(raw, metric=f)
        np.testing.assert_array_equal(result, expected)

        # test more than 1 chunk
        x = tensor(raw, chunk_size=12)

        dist = distance.pdist(x)
        tdist = dist.tiles()
        self.assertEqual(len(tdist.chunks), 1)
        result = self._executor.execute_tensor(dist, concat=True)[0]
        expected = sp_pdist(raw)
        np.testing.assert_array_equal(result, expected)

        dist = distance.pdist(x, aggregate_size=3)
        tdist = dist.tiles()
        self.assertEqual(len(tdist.chunks), 3)
        result = self._executor.execute_tensor(dist, concat=True)[0]
        expected = sp_pdist(raw)
        np.testing.assert_array_equal(result, expected)

        dist = distance.pdist(x, metric='hamming', aggregate_size=2)
        tdist = dist.tiles()
        self.assertEqual(len(tdist.chunks), 2)
        result = self._executor.execute_tensor(dist, concat=True)[0]
        expected = sp_pdist(raw, metric='hamming')
        np.testing.assert_array_equal(result, expected)

        f = lambda u, v: np.sqrt(((u - v)**2).sum())
        dist = distance.pdist(x, metric=f, aggregate_size=2)
        result = self._executor.execute_tensor(dist, concat=True)[0]
        expected = sp_pdist(raw, metric=f)
        np.testing.assert_array_equal(result, expected)

        for x in [tensor(raw), tensor(raw, chunk_size=12)]:
            # test w
            weight = np.random.rand(10)
            w = tensor(weight, chunk_size=7)
            dist = distance.pdist(x, metric='wminkowski', p=3, w=w)
            result = self._executor.execute_tensor(dist, concat=True)[0]
            expected = sp_pdist(raw, metric='wminkowski', p=3, w=weight)
            np.testing.assert_array_equal(result, expected)

            # test V
            v = np.random.rand(10)
            V = tensor(v, chunk_size=7)
            dist = distance.pdist(x, metric='seuclidean', V=V)
            result = self._executor.execute_tensor(dist, concat=True)[0]
            expected = sp_pdist(raw, metric='seuclidean', V=v)
            np.testing.assert_array_equal(result, expected)

            # test VI
            vi = np.random.rand(10, 10)
            VI = tensor(vi, chunk_size=8)
            dist = distance.pdist(x, metric='mahalanobis', VI=VI)
            result = self._executor.execute_tensor(dist, concat=True)[0]
            expected = sp_pdist(raw, metric='mahalanobis', VI=vi)
            np.testing.assert_array_equal(result, expected)
Exemple #2
0
def test_pdist():
    raw = np.random.rand(100, 10)

    # test 1 chunk
    a = tensor(raw, chunk_size=100)
    dist = distance.pdist(a)
    assert dist.shape == (100 * 99 // 2,)

    dist = tile(dist)
    assert len(dist.chunks) == 1
    for c in dist.chunks:
        assert c.shape == (dist.shape[0],)

    # test multiple chunks
    a = tensor(raw, chunk_size=15)
    dist = distance.pdist(a, aggregate_size=2)
    assert dist.shape == (100 * 99 // 2,)

    dist = tile(dist)
    assert len(dist.chunks) == 2
    for c in dist.chunks:
        assert c.shape == (dist.shape[0] // 2,)

    # X cannot be sparse
    raw = sps.csr_matrix(np.zeros((4, 3)))
    a = tensor(raw)
    with pytest.raises(ValueError):
        distance.pdist(a)

    # X can only be 2-d
    with pytest.raises(ValueError):
        distance.pdist(np.random.rand(3, 3, 3))

    # out type wrong
    with pytest.raises(TypeError):
        distance.pdist(np.random.rand(3, 3), out=2)

    # out shape wrong
    with pytest.raises(ValueError):
        distance.pdist(np.random.rand(3, 3),
                       out=tensor(np.random.rand(2)))

    # out dtype wrong
    with pytest.raises(ValueError):
        distance.pdist(np.random.rand(3, 3),
                       out=tensor(np.random.randint(2, size=(3,))))

    # test extra param
    with pytest.raises(TypeError):
        distance.pdist(np.random.rand(3, 3), unknown_kw='unknown_kw')
Exemple #3
0
    def testPdist(self):
        raw = np.random.rand(100, 10)

        # test 1 chunk
        a = tensor(raw, chunk_size=100)
        dist = distance.pdist(a)
        self.assertEqual(dist.shape, (100 * 99 // 2, ))

        dist = dist.tiles()
        self.assertEqual(len(dist.chunks), 1)
        for c in dist.chunks:
            self.assertEqual(c.shape, (dist.shape[0], ))

        # test multiple chunks
        a = tensor(raw, chunk_size=15)
        dist = distance.pdist(a, aggregate_size=2)
        self.assertEqual(dist.shape, (100 * 99 // 2, ))

        dist = dist.tiles()
        self.assertEqual(len(dist.chunks), 2)
        for c in dist.chunks:
            self.assertEqual(c.shape, (dist.shape[0] // 2, ))

        # X cannot be sparse
        raw = sps.csr_matrix(np.zeros((4, 3)))
        a = tensor(raw)
        with self.assertRaises(ValueError):
            distance.pdist(a)

        # X can only be 2-d
        with self.assertRaises(ValueError):
            distance.pdist(np.random.rand(3, 3, 3))

        # out type wrong
        with self.assertRaises(TypeError):
            distance.pdist(np.random.rand(3, 3), out=2)

        # out shape wrong
        with self.assertRaises(ValueError):
            distance.pdist(np.random.rand(3, 3), out=tensor(np.random.rand(2)))

        # out dtype wrong
        with self.assertRaises(ValueError):
            distance.pdist(np.random.rand(3, 3),
                           out=tensor(np.random.randint(2, size=(3, ))))

        # test extra param
        with self.assertRaises(TypeError):
            distance.pdist(np.random.rand(3, 3), unknown_kw='unknown_kw')