def testEuclideanDistancesExecution(self): dense_raw_x = np.random.rand(30, 10) dense_raw_y = np.random.rand(40, 10) sparse_raw_x = SparseNDArray( sps.random(30, 10, density=0.5, format='csr')) sparse_raw_y = SparseNDArray( sps.random(40, 10, density=0.5, format='csr')) for raw_x, raw_y in [(dense_raw_x, dense_raw_y), (sparse_raw_x, sparse_raw_y)]: x = mt.tensor(raw_x, chunk_size=9) y = mt.tensor(raw_y, chunk_size=7) distance = euclidean_distances(x, y) result = self.executor.execute_tensor(distance, concat=True)[0] expected = sk_euclidean_distances(raw_x, Y=raw_y) np.testing.assert_almost_equal(result, expected) x_norm = x.sum(axis=1)[..., np.newaxis] y_norm = y.sum(axis=1)[np.newaxis, ...] distance = euclidean_distances(x, y, X_norm_squared=x_norm, Y_norm_squared=y_norm) x_raw_norm = raw_x.sum(axis=1)[..., np.newaxis] y_raw_norm = raw_y.sum(axis=1)[np.newaxis, ...] result = self.executor.execute_tensor(distance, concat=True)[0] expected = sk_euclidean_distances(raw_x, raw_y, X_norm_squared=x_raw_norm, Y_norm_squared=y_raw_norm) np.testing.assert_almost_equal(result, expected) x_sq = (x**2).astype(np.float32) y_sq = (y**2).astype(np.float32) distance = euclidean_distances(x_sq, y_sq, squared=True) x_raw_sq = (raw_x**2).astype(np.float32) y_raw_sq = (raw_y**2).astype(np.float32) result = self.executor.execute_tensor(distance, concat=True)[0] expected = sk_euclidean_distances(x_raw_sq, y_raw_sq, squared=True) np.testing.assert_almost_equal(result, expected, decimal=6) # test x is y distance = euclidean_distances(x) result = self.executor.execute_tensor(distance, concat=True)[0] expected = sk_euclidean_distances(raw_x) np.testing.assert_almost_equal(result, expected)
def test_euclidean_distances_execution(setup): dense_raw_x = np.random.rand(30, 10) dense_raw_y = np.random.rand(40, 10) sparse_raw_x = SparseNDArray(sps.random(30, 10, density=0.5, format='csr')) sparse_raw_y = SparseNDArray(sps.random(40, 10, density=0.5, format='csr')) for raw_x, raw_y in [(dense_raw_x, dense_raw_y), (sparse_raw_x, sparse_raw_y)]: x = mt.tensor(raw_x, chunk_size=9) y = mt.tensor(raw_y, chunk_size=7) distance = euclidean_distances(x, y) result = distance.execute().fetch() expected = sk_euclidean_distances(raw_x, Y=raw_y) np.testing.assert_almost_equal(result, expected) x_norm = x.sum(axis=1)[..., np.newaxis] y_norm = y.sum(axis=1)[np.newaxis, ...] distance = euclidean_distances(x, y, X_norm_squared=x_norm, Y_norm_squared=y_norm) x_raw_norm = raw_x.sum(axis=1)[..., np.newaxis] y_raw_norm = raw_y.sum(axis=1)[np.newaxis, ...] result = distance.execute().fetch() expected = sk_euclidean_distances(raw_x, raw_y, X_norm_squared=x_raw_norm, Y_norm_squared=y_raw_norm) np.testing.assert_almost_equal(result, expected) x_sq = (x**2).astype(np.float32) y_sq = (y**2).astype(np.float32) distance = euclidean_distances(x_sq, y_sq, squared=True) x_raw_sq = (raw_x**2).astype(np.float32) y_raw_sq = (raw_y**2).astype(np.float32) result = distance.execute().fetch() expected = sk_euclidean_distances(x_raw_sq, y_raw_sq, squared=True) np.testing.assert_almost_equal(result, expected, decimal=6) # test x is y distance = euclidean_distances(x) result = distance.execute().fetch() expected = sk_euclidean_distances(raw_x) np.testing.assert_almost_equal(result, expected) # test size adjust raw1 = np.random.rand(12, 4) raw2 = np.random.rand(18, 4) t1 = mt.tensor(raw1, chunk_size=4) t2 = mt.tensor(raw2, chunk_size=6) with option_context({'chunk_store_limit': 80}): distance = euclidean_distances(t1, t2) result = distance.execute().fetch() expected = sk_euclidean_distances(raw1, raw2) np.testing.assert_almost_equal(result, expected) distance = euclidean_distances(t2, t1) result = distance.execute().fetch() expected = sk_euclidean_distances(raw2, raw1) np.testing.assert_almost_equal(result, expected) with option_context({'chunk_store_limit': 20}): distance = euclidean_distances(t1, t2) result = distance.execute().fetch() expected = sk_euclidean_distances(raw1, raw2) np.testing.assert_almost_equal(result, expected)