def testCdistExecution(self): from scipy.spatial.distance import cdist as sp_cdist raw_a = np.random.rand(100, 10) raw_b = np.random.rand(89, 10) # test 1 chunk xa = tensor(raw_a, chunk_size=100) xb = tensor(raw_b, chunk_size=100) dist = distance.cdist(xa, xb) result = self._executor.execute_tensor(dist, concat=True)[0] expected = sp_cdist(raw_a, raw_b) np.testing.assert_array_equal(result, expected) dist = distance.cdist(xa, xb, metric='hamming') result = self._executor.execute_tensor(dist, concat=True)[0] expected = sp_cdist(raw_a, raw_b, metric='hamming') np.testing.assert_array_equal(result, expected) f = lambda u, v: np.sqrt(((u - v)**2).sum()) dist = distance.cdist(xa, xb, metric=f) result = self._executor.execute_tensor(dist, concat=True)[0] expected = sp_cdist(raw_a, raw_b, metric=f) np.testing.assert_array_equal(result, expected) # test more than 1 chunk xa = tensor(raw_a, chunk_size=12) xb = tensor(raw_b, chunk_size=13) dist = distance.cdist(xa, xb) result = self._executor.execute_tensor(dist, concat=True)[0] expected = sp_cdist(raw_a, raw_b) np.testing.assert_array_equal(result, expected) dist = distance.cdist(xa, xb, metric='hamming') result = self._executor.execute_tensor(dist, concat=True)[0] expected = sp_cdist(raw_a, raw_b, metric='hamming') np.testing.assert_array_equal(result, expected) f = lambda u, v: np.sqrt(((u - v)**2).sum()) dist = distance.cdist(xa, xb, metric=f) result = self._executor.execute_tensor(dist, concat=True)[0] expected = sp_cdist(raw_a, raw_b, metric=f) np.testing.assert_array_equal(result, expected) for xa, xb in [(tensor(raw_a), tensor(raw_b)), (tensor(raw_a, chunk_size=12), tensor(raw_b, chunk_size=13))]: # test w weight = np.random.rand(10) w = tensor(weight, chunk_size=7) dist = distance.cdist(xa, xb, metric='wminkowski', p=3, w=w) result = self._executor.execute_tensor(dist, concat=True)[0] expected = sp_cdist(raw_a, raw_b, metric='wminkowski', p=3, w=weight) np.testing.assert_array_equal(result, expected) # test V v = np.random.rand(10) V = tensor(v, chunk_size=7) dist = distance.cdist(xa, xb, metric='seuclidean', V=V) result = self._executor.execute_tensor(dist, concat=True)[0] expected = sp_cdist(raw_a, raw_b, metric='seuclidean', V=v) np.testing.assert_array_equal(result, expected) # test VI vi = np.random.rand(10, 10) VI = tensor(vi, chunk_size=8) dist = distance.cdist(xa, xb, metric='mahalanobis', VI=VI) result = self._executor.execute_tensor(dist, concat=True)[0] expected = sp_cdist(raw_a, raw_b, metric='mahalanobis', VI=vi) np.testing.assert_array_equal(result, expected)
def test_cdist(): raw_a = np.random.rand(100, 10) raw_b = np.random.rand(90, 10) # test 1 chunk a = tensor(raw_a, chunk_size=100) b = tensor(raw_b, chunk_size=100) dist = distance.cdist(a, b) assert dist.shape == (100, 90) dist = tile(dist) assert len(dist.chunks) == 1 for c in dist.chunks: assert c.shape == dist.shape # test multiple chunks a = tensor(raw_a, chunk_size=15) b = tensor(raw_b, chunk_size=16) dist = distance.cdist(a, b) assert dist.shape == (100, 90) ta, tb, dist = tile(a, b, dist) assert len(dist.chunks) == (100 // 15 + 1) * (90 // 16 + 1) assert dist.nsplits == (ta.nsplits[0], tb.nsplits[0]) for c in dist.chunks: assert c.shape == (ta.cix[c.index[0], 0].shape[0], tb.cix[c.index[1], 0].shape[0]) # XA can only be 2-d with pytest.raises(ValueError): distance.cdist(np.random.rand(3, 3, 3), np.random.rand(3, 3)) # XB can only be 2-d with pytest.raises(ValueError): distance.cdist(np.random.rand(3, 3), np.random.rand(3, 3, 3)) # XA cannot be sparse raw = sps.csr_matrix(np.zeros((4, 3))) a = tensor(raw) with pytest.raises(ValueError): distance.cdist(a, np.random.rand(10, 3)) # XB cannot be sparse raw = sps.csr_matrix(np.zeros((4, 3))) b = tensor(raw) with pytest.raises(ValueError): distance.cdist(np.random.rand(10, 3), b) # out type wrong with pytest.raises(TypeError): distance.cdist(raw_a, raw_b, out=2) # out shape wrong with pytest.raises(ValueError): distance.cdist(raw_a, raw_b, out=tensor(np.random.rand(100, 91))) # out dtype wrong with pytest.raises(ValueError): distance.cdist(raw_a, raw_b, out=tensor(np.random.randint(2, size=(100, 90)))) # test extra param with pytest.raises(TypeError): distance.cdist(raw_a, raw_b, unknown_kw='unknown_kw')
def testCdist(self): raw_a = np.random.rand(100, 10) raw_b = np.random.rand(90, 10) # test 1 chunk a = tensor(raw_a, chunk_size=100) b = tensor(raw_b, chunk_size=100) dist = distance.cdist(a, b) self.assertEqual(dist.shape, (100, 90)) dist = dist.tiles() self.assertEqual(len(dist.chunks), 1) for c in dist.chunks: self.assertEqual(c.shape, dist.shape) # test multiple chunks a = tensor(raw_a, chunk_size=15) b = tensor(raw_b, chunk_size=16) dist = distance.cdist(a, b) self.assertEqual(dist.shape, (100, 90)) dist = dist.tiles() self.assertEqual(len(dist.chunks), (100 // 15 + 1) * (90 // 16 + 1)) self.assertEqual(dist.nsplits, (get_tiled(a).nsplits[0], get_tiled(b).nsplits[0])) for c in dist.chunks: ta = get_tiled(a) tb = get_tiled(b) self.assertEqual(c.shape, (ta.cix[c.index[0], 0].shape[0], tb.cix[c.index[1], 0].shape[0])) # XA can only be 2-d with self.assertRaises(ValueError): distance.cdist(np.random.rand(3, 3, 3), np.random.rand(3, 3)) # XB can only be 2-d with self.assertRaises(ValueError): distance.cdist(np.random.rand(3, 3), np.random.rand(3, 3, 3)) # XA cannot be sparse raw = sps.csr_matrix(np.zeros((4, 3))) a = tensor(raw) with self.assertRaises(ValueError): distance.cdist(a, np.random.rand(10, 3)) # XB cannot be sparse raw = sps.csr_matrix(np.zeros((4, 3))) b = tensor(raw) with self.assertRaises(ValueError): distance.cdist(np.random.rand(10, 3), b) # out type wrong with self.assertRaises(TypeError): distance.cdist(raw_a, raw_b, out=2) # out shape wrong with self.assertRaises(ValueError): distance.cdist(raw_a, raw_b, out=tensor(np.random.rand(100, 91))) # out dtype wrong with self.assertRaises(ValueError): distance.cdist(raw_a, raw_b, out=tensor(np.random.randint(2, size=(100, 90)))) # test extra param with self.assertRaises(TypeError): distance.cdist(raw_a, raw_b, unknown_kw='unknown_kw')