Esempio n. 1
0
    def testCdistExecution(self):
        from scipy.spatial.distance import cdist as sp_cdist

        raw_a = np.random.rand(100, 10)
        raw_b = np.random.rand(89, 10)

        # test 1 chunk
        xa = tensor(raw_a, chunk_size=100)
        xb = tensor(raw_b, chunk_size=100)

        dist = distance.cdist(xa, xb)
        result = self._executor.execute_tensor(dist, concat=True)[0]
        expected = sp_cdist(raw_a, raw_b)
        np.testing.assert_array_equal(result, expected)

        dist = distance.cdist(xa, xb, metric='hamming')
        result = self._executor.execute_tensor(dist, concat=True)[0]
        expected = sp_cdist(raw_a, raw_b, metric='hamming')
        np.testing.assert_array_equal(result, expected)

        f = lambda u, v: np.sqrt(((u - v)**2).sum())
        dist = distance.cdist(xa, xb, metric=f)
        result = self._executor.execute_tensor(dist, concat=True)[0]
        expected = sp_cdist(raw_a, raw_b, metric=f)
        np.testing.assert_array_equal(result, expected)

        # test more than 1 chunk
        xa = tensor(raw_a, chunk_size=12)
        xb = tensor(raw_b, chunk_size=13)

        dist = distance.cdist(xa, xb)
        result = self._executor.execute_tensor(dist, concat=True)[0]
        expected = sp_cdist(raw_a, raw_b)
        np.testing.assert_array_equal(result, expected)

        dist = distance.cdist(xa, xb, metric='hamming')
        result = self._executor.execute_tensor(dist, concat=True)[0]
        expected = sp_cdist(raw_a, raw_b, metric='hamming')
        np.testing.assert_array_equal(result, expected)

        f = lambda u, v: np.sqrt(((u - v)**2).sum())
        dist = distance.cdist(xa, xb, metric=f)
        result = self._executor.execute_tensor(dist, concat=True)[0]
        expected = sp_cdist(raw_a, raw_b, metric=f)
        np.testing.assert_array_equal(result, expected)

        for xa, xb in [(tensor(raw_a), tensor(raw_b)),
                       (tensor(raw_a,
                               chunk_size=12), tensor(raw_b, chunk_size=13))]:
            # test w
            weight = np.random.rand(10)
            w = tensor(weight, chunk_size=7)
            dist = distance.cdist(xa, xb, metric='wminkowski', p=3, w=w)
            result = self._executor.execute_tensor(dist, concat=True)[0]
            expected = sp_cdist(raw_a,
                                raw_b,
                                metric='wminkowski',
                                p=3,
                                w=weight)
            np.testing.assert_array_equal(result, expected)

            # test V
            v = np.random.rand(10)
            V = tensor(v, chunk_size=7)
            dist = distance.cdist(xa, xb, metric='seuclidean', V=V)
            result = self._executor.execute_tensor(dist, concat=True)[0]
            expected = sp_cdist(raw_a, raw_b, metric='seuclidean', V=v)
            np.testing.assert_array_equal(result, expected)

            # test VI
            vi = np.random.rand(10, 10)
            VI = tensor(vi, chunk_size=8)
            dist = distance.cdist(xa, xb, metric='mahalanobis', VI=VI)
            result = self._executor.execute_tensor(dist, concat=True)[0]
            expected = sp_cdist(raw_a, raw_b, metric='mahalanobis', VI=vi)
            np.testing.assert_array_equal(result, expected)
Esempio n. 2
0
def test_cdist():
    raw_a = np.random.rand(100, 10)
    raw_b = np.random.rand(90, 10)

    # test 1 chunk
    a = tensor(raw_a, chunk_size=100)
    b = tensor(raw_b, chunk_size=100)
    dist = distance.cdist(a, b)
    assert dist.shape == (100, 90)

    dist = tile(dist)
    assert len(dist.chunks) == 1
    for c in dist.chunks:
        assert c.shape == dist.shape

    # test multiple chunks
    a = tensor(raw_a, chunk_size=15)
    b = tensor(raw_b, chunk_size=16)
    dist = distance.cdist(a, b)
    assert dist.shape == (100, 90)

    ta, tb, dist = tile(a, b, dist)
    assert len(dist.chunks) == (100 // 15 + 1) * (90 // 16 + 1)
    assert dist.nsplits == (ta.nsplits[0], tb.nsplits[0])
    for c in dist.chunks:
        assert c.shape == (ta.cix[c.index[0], 0].shape[0],
                                   tb.cix[c.index[1], 0].shape[0])

    # XA can only be 2-d
    with pytest.raises(ValueError):
        distance.cdist(np.random.rand(3, 3, 3), np.random.rand(3, 3))

    # XB can only be 2-d
    with pytest.raises(ValueError):
        distance.cdist(np.random.rand(3, 3), np.random.rand(3, 3, 3))

    # XA cannot be sparse
    raw = sps.csr_matrix(np.zeros((4, 3)))
    a = tensor(raw)
    with pytest.raises(ValueError):
        distance.cdist(a, np.random.rand(10, 3))

    # XB cannot be sparse
    raw = sps.csr_matrix(np.zeros((4, 3)))
    b = tensor(raw)
    with pytest.raises(ValueError):
        distance.cdist(np.random.rand(10, 3), b)

    # out type wrong
    with pytest.raises(TypeError):
        distance.cdist(raw_a, raw_b, out=2)

    # out shape wrong
    with pytest.raises(ValueError):
        distance.cdist(raw_a, raw_b, out=tensor(np.random.rand(100, 91)))

    # out dtype wrong
    with pytest.raises(ValueError):
        distance.cdist(raw_a, raw_b,
                       out=tensor(np.random.randint(2, size=(100, 90))))

    # test extra param
    with pytest.raises(TypeError):
        distance.cdist(raw_a, raw_b, unknown_kw='unknown_kw')
Esempio n. 3
0
    def testCdist(self):
        raw_a = np.random.rand(100, 10)
        raw_b = np.random.rand(90, 10)

        # test 1 chunk
        a = tensor(raw_a, chunk_size=100)
        b = tensor(raw_b, chunk_size=100)
        dist = distance.cdist(a, b)
        self.assertEqual(dist.shape, (100, 90))

        dist = dist.tiles()
        self.assertEqual(len(dist.chunks), 1)
        for c in dist.chunks:
            self.assertEqual(c.shape, dist.shape)

        # test multiple chunks
        a = tensor(raw_a, chunk_size=15)
        b = tensor(raw_b, chunk_size=16)
        dist = distance.cdist(a, b)
        self.assertEqual(dist.shape, (100, 90))

        dist = dist.tiles()
        self.assertEqual(len(dist.chunks), (100 // 15 + 1) * (90 // 16 + 1))
        self.assertEqual(dist.nsplits,
                         (get_tiled(a).nsplits[0], get_tiled(b).nsplits[0]))
        for c in dist.chunks:
            ta = get_tiled(a)
            tb = get_tiled(b)
            self.assertEqual(c.shape, (ta.cix[c.index[0], 0].shape[0],
                                       tb.cix[c.index[1], 0].shape[0]))

        # XA can only be 2-d
        with self.assertRaises(ValueError):
            distance.cdist(np.random.rand(3, 3, 3), np.random.rand(3, 3))

        # XB can only be 2-d
        with self.assertRaises(ValueError):
            distance.cdist(np.random.rand(3, 3), np.random.rand(3, 3, 3))

        # XA cannot be sparse
        raw = sps.csr_matrix(np.zeros((4, 3)))
        a = tensor(raw)
        with self.assertRaises(ValueError):
            distance.cdist(a, np.random.rand(10, 3))

        # XB cannot be sparse
        raw = sps.csr_matrix(np.zeros((4, 3)))
        b = tensor(raw)
        with self.assertRaises(ValueError):
            distance.cdist(np.random.rand(10, 3), b)

        # out type wrong
        with self.assertRaises(TypeError):
            distance.cdist(raw_a, raw_b, out=2)

        # out shape wrong
        with self.assertRaises(ValueError):
            distance.cdist(raw_a, raw_b, out=tensor(np.random.rand(100, 91)))

        # out dtype wrong
        with self.assertRaises(ValueError):
            distance.cdist(raw_a,
                           raw_b,
                           out=tensor(np.random.randint(2, size=(100, 90))))

        # test extra param
        with self.assertRaises(TypeError):
            distance.cdist(raw_a, raw_b, unknown_kw='unknown_kw')