Example #1
0
    def testEuclideanDistancesExecution(self):
        dense_raw_x = np.random.rand(30, 10)
        dense_raw_y = np.random.rand(40, 10)
        sparse_raw_x = SparseNDArray(
            sps.random(30, 10, density=0.5, format='csr'))
        sparse_raw_y = SparseNDArray(
            sps.random(40, 10, density=0.5, format='csr'))

        for raw_x, raw_y in [(dense_raw_x, dense_raw_y),
                             (sparse_raw_x, sparse_raw_y)]:
            x = mt.tensor(raw_x, chunk_size=9)
            y = mt.tensor(raw_y, chunk_size=7)

            distance = euclidean_distances(x, y)

            result = self.executor.execute_tensor(distance, concat=True)[0]
            expected = sk_euclidean_distances(raw_x, Y=raw_y)
            np.testing.assert_almost_equal(result, expected)

            x_norm = x.sum(axis=1)[..., np.newaxis]
            y_norm = y.sum(axis=1)[np.newaxis, ...]
            distance = euclidean_distances(x,
                                           y,
                                           X_norm_squared=x_norm,
                                           Y_norm_squared=y_norm)
            x_raw_norm = raw_x.sum(axis=1)[..., np.newaxis]
            y_raw_norm = raw_y.sum(axis=1)[np.newaxis, ...]

            result = self.executor.execute_tensor(distance, concat=True)[0]
            expected = sk_euclidean_distances(raw_x,
                                              raw_y,
                                              X_norm_squared=x_raw_norm,
                                              Y_norm_squared=y_raw_norm)
            np.testing.assert_almost_equal(result, expected)

            x_sq = (x**2).astype(np.float32)
            y_sq = (y**2).astype(np.float32)

            distance = euclidean_distances(x_sq, y_sq, squared=True)

            x_raw_sq = (raw_x**2).astype(np.float32)
            y_raw_sq = (raw_y**2).astype(np.float32)

            result = self.executor.execute_tensor(distance, concat=True)[0]
            expected = sk_euclidean_distances(x_raw_sq, y_raw_sq, squared=True)
            np.testing.assert_almost_equal(result, expected, decimal=6)

            # test x is y
            distance = euclidean_distances(x)

            result = self.executor.execute_tensor(distance, concat=True)[0]
            expected = sk_euclidean_distances(raw_x)

            np.testing.assert_almost_equal(result, expected)
Example #2
0
def test_euclidean_distances_execution(setup):
    dense_raw_x = np.random.rand(30, 10)
    dense_raw_y = np.random.rand(40, 10)
    sparse_raw_x = SparseNDArray(sps.random(30, 10, density=0.5, format='csr'))
    sparse_raw_y = SparseNDArray(sps.random(40, 10, density=0.5, format='csr'))

    for raw_x, raw_y in [(dense_raw_x, dense_raw_y),
                         (sparse_raw_x, sparse_raw_y)]:
        x = mt.tensor(raw_x, chunk_size=9)
        y = mt.tensor(raw_y, chunk_size=7)

        distance = euclidean_distances(x, y)

        result = distance.execute().fetch()
        expected = sk_euclidean_distances(raw_x, Y=raw_y)
        np.testing.assert_almost_equal(result, expected)

        x_norm = x.sum(axis=1)[..., np.newaxis]
        y_norm = y.sum(axis=1)[np.newaxis, ...]
        distance = euclidean_distances(x,
                                       y,
                                       X_norm_squared=x_norm,
                                       Y_norm_squared=y_norm)
        x_raw_norm = raw_x.sum(axis=1)[..., np.newaxis]
        y_raw_norm = raw_y.sum(axis=1)[np.newaxis, ...]

        result = distance.execute().fetch()
        expected = sk_euclidean_distances(raw_x,
                                          raw_y,
                                          X_norm_squared=x_raw_norm,
                                          Y_norm_squared=y_raw_norm)
        np.testing.assert_almost_equal(result, expected)

        x_sq = (x**2).astype(np.float32)
        y_sq = (y**2).astype(np.float32)

        distance = euclidean_distances(x_sq, y_sq, squared=True)

        x_raw_sq = (raw_x**2).astype(np.float32)
        y_raw_sq = (raw_y**2).astype(np.float32)

        result = distance.execute().fetch()
        expected = sk_euclidean_distances(x_raw_sq, y_raw_sq, squared=True)
        np.testing.assert_almost_equal(result, expected, decimal=6)

        # test x is y
        distance = euclidean_distances(x)

        result = distance.execute().fetch()
        expected = sk_euclidean_distances(raw_x)

        np.testing.assert_almost_equal(result, expected)

    # test size adjust
    raw1 = np.random.rand(12, 4)
    raw2 = np.random.rand(18, 4)

    t1 = mt.tensor(raw1, chunk_size=4)
    t2 = mt.tensor(raw2, chunk_size=6)
    with option_context({'chunk_store_limit': 80}):
        distance = euclidean_distances(t1, t2)

        result = distance.execute().fetch()
        expected = sk_euclidean_distances(raw1, raw2)
        np.testing.assert_almost_equal(result, expected)

        distance = euclidean_distances(t2, t1)

        result = distance.execute().fetch()
        expected = sk_euclidean_distances(raw2, raw1)
        np.testing.assert_almost_equal(result, expected)

    with option_context({'chunk_store_limit': 20}):
        distance = euclidean_distances(t1, t2)

        result = distance.execute().fetch()
        expected = sk_euclidean_distances(raw1, raw2)
        np.testing.assert_almost_equal(result, expected)