def testPdistExecution(self): from scipy.spatial.distance import pdist as sp_pdist raw = np.random.rand(100, 10) # test 1 chunk x = tensor(raw, chunk_size=100) dist = distance.pdist(x) result = self._executor.execute_tensor(dist, concat=True)[0] expected = sp_pdist(raw) np.testing.assert_array_equal(result, expected) dist = distance.pdist(x, metric='hamming') result = self._executor.execute_tensor(dist, concat=True)[0] expected = sp_pdist(raw, metric='hamming') np.testing.assert_array_equal(result, expected) f = lambda u, v: np.sqrt(((u - v)**2).sum()) dist = distance.pdist(x, metric=f) result = self._executor.execute_tensor(dist, concat=True)[0] expected = sp_pdist(raw, metric=f) np.testing.assert_array_equal(result, expected) # test more than 1 chunk x = tensor(raw, chunk_size=12) dist = distance.pdist(x) tdist = dist.tiles() self.assertEqual(len(tdist.chunks), 1) result = self._executor.execute_tensor(dist, concat=True)[0] expected = sp_pdist(raw) np.testing.assert_array_equal(result, expected) dist = distance.pdist(x, aggregate_size=3) tdist = dist.tiles() self.assertEqual(len(tdist.chunks), 3) result = self._executor.execute_tensor(dist, concat=True)[0] expected = sp_pdist(raw) np.testing.assert_array_equal(result, expected) dist = distance.pdist(x, metric='hamming', aggregate_size=2) tdist = dist.tiles() self.assertEqual(len(tdist.chunks), 2) result = self._executor.execute_tensor(dist, concat=True)[0] expected = sp_pdist(raw, metric='hamming') np.testing.assert_array_equal(result, expected) f = lambda u, v: np.sqrt(((u - v)**2).sum()) dist = distance.pdist(x, metric=f, aggregate_size=2) result = self._executor.execute_tensor(dist, concat=True)[0] expected = sp_pdist(raw, metric=f) np.testing.assert_array_equal(result, expected) for x in [tensor(raw), tensor(raw, chunk_size=12)]: # test w weight = np.random.rand(10) w = tensor(weight, chunk_size=7) dist = distance.pdist(x, metric='wminkowski', p=3, w=w) result = self._executor.execute_tensor(dist, concat=True)[0] expected = sp_pdist(raw, metric='wminkowski', p=3, w=weight) np.testing.assert_array_equal(result, expected) # test V v = np.random.rand(10) V = tensor(v, chunk_size=7) dist = distance.pdist(x, metric='seuclidean', V=V) result = self._executor.execute_tensor(dist, concat=True)[0] expected = sp_pdist(raw, metric='seuclidean', V=v) np.testing.assert_array_equal(result, expected) # test VI vi = np.random.rand(10, 10) VI = tensor(vi, chunk_size=8) dist = distance.pdist(x, metric='mahalanobis', VI=VI) result = self._executor.execute_tensor(dist, concat=True)[0] expected = sp_pdist(raw, metric='mahalanobis', VI=vi) np.testing.assert_array_equal(result, expected)
def test_pdist(): raw = np.random.rand(100, 10) # test 1 chunk a = tensor(raw, chunk_size=100) dist = distance.pdist(a) assert dist.shape == (100 * 99 // 2,) dist = tile(dist) assert len(dist.chunks) == 1 for c in dist.chunks: assert c.shape == (dist.shape[0],) # test multiple chunks a = tensor(raw, chunk_size=15) dist = distance.pdist(a, aggregate_size=2) assert dist.shape == (100 * 99 // 2,) dist = tile(dist) assert len(dist.chunks) == 2 for c in dist.chunks: assert c.shape == (dist.shape[0] // 2,) # X cannot be sparse raw = sps.csr_matrix(np.zeros((4, 3))) a = tensor(raw) with pytest.raises(ValueError): distance.pdist(a) # X can only be 2-d with pytest.raises(ValueError): distance.pdist(np.random.rand(3, 3, 3)) # out type wrong with pytest.raises(TypeError): distance.pdist(np.random.rand(3, 3), out=2) # out shape wrong with pytest.raises(ValueError): distance.pdist(np.random.rand(3, 3), out=tensor(np.random.rand(2))) # out dtype wrong with pytest.raises(ValueError): distance.pdist(np.random.rand(3, 3), out=tensor(np.random.randint(2, size=(3,)))) # test extra param with pytest.raises(TypeError): distance.pdist(np.random.rand(3, 3), unknown_kw='unknown_kw')
def testPdist(self): raw = np.random.rand(100, 10) # test 1 chunk a = tensor(raw, chunk_size=100) dist = distance.pdist(a) self.assertEqual(dist.shape, (100 * 99 // 2, )) dist = dist.tiles() self.assertEqual(len(dist.chunks), 1) for c in dist.chunks: self.assertEqual(c.shape, (dist.shape[0], )) # test multiple chunks a = tensor(raw, chunk_size=15) dist = distance.pdist(a, aggregate_size=2) self.assertEqual(dist.shape, (100 * 99 // 2, )) dist = dist.tiles() self.assertEqual(len(dist.chunks), 2) for c in dist.chunks: self.assertEqual(c.shape, (dist.shape[0] // 2, )) # X cannot be sparse raw = sps.csr_matrix(np.zeros((4, 3))) a = tensor(raw) with self.assertRaises(ValueError): distance.pdist(a) # X can only be 2-d with self.assertRaises(ValueError): distance.pdist(np.random.rand(3, 3, 3)) # out type wrong with self.assertRaises(TypeError): distance.pdist(np.random.rand(3, 3), out=2) # out shape wrong with self.assertRaises(ValueError): distance.pdist(np.random.rand(3, 3), out=tensor(np.random.rand(2))) # out dtype wrong with self.assertRaises(ValueError): distance.pdist(np.random.rand(3, 3), out=tensor(np.random.randint(2, size=(3, )))) # test extra param with self.assertRaises(TypeError): distance.pdist(np.random.rand(3, 3), unknown_kw='unknown_kw')