Esempio n. 1
0
    def testUnique(self):
        x = unique(np.int64(1))

        self.assertEqual(len(x.shape), 1)
        self.assertTrue(np.isnan(x.shape[0]))
        self.assertEqual(x.dtype, np.dtype(np.int64))

        x = x.tiles()

        self.assertEqual(len(x.chunks), 1)
        self.assertEqual(len(x.chunks[0].shape), 1)
        self.assertTrue(np.isnan(x.chunks[0].shape[0]))
        self.assertEqual(x.chunks[0].dtype, np.dtype(np.int64))

        x, indices = unique(0.1, return_index=True)

        self.assertEqual(len(x.shape), 1)
        self.assertTrue(np.isnan(x.shape[0]))
        self.assertEqual(x.dtype, np.dtype(np.float64))
        self.assertEqual(len(indices.shape), 1)
        self.assertTrue(np.isnan(indices.shape[0]))
        self.assertEqual(indices.dtype, np.dtype(np.intp))

        x = x.tiles()
        indices = get_tiled(indices)

        self.assertEqual(len(x.chunks), 1)
        self.assertEqual(len(x.chunks[0].shape), 1)
        self.assertTrue(np.isnan(x.chunks[0].shape[0]))
        self.assertEqual(x.chunks[0].dtype, np.dtype(np.float64))
        self.assertEqual(len(indices.chunks), 1)
        self.assertEqual(len(indices.chunks[0].shape), 1)
        self.assertTrue(np.isnan(indices.chunks[0].shape[0]))
        self.assertEqual(indices.chunks[0].dtype, np.dtype(np.intp))

        with self.assertRaises(np.AxisError):
            unique(0.1, axis=1)

        raw = np.random.randint(10, size=(10), dtype=np.int64)
        a = tensor(raw, chunk_size=4)

        x = unique(a, aggregate_size=2)

        self.assertEqual(len(x.shape), len(raw.shape))
        self.assertTrue(np.isnan(x.shape[0]))
        self.assertEqual(x.dtype, np.dtype(np.int64))

        x = x.tiles()

        self.assertEqual(len(x.chunks), 2)
        self.assertEqual(x.nsplits, ((np.nan, np.nan), ))
        for i in range(2):
            self.assertEqual(x.chunks[i].shape, (np.nan, ))
            self.assertEqual(x.chunks[i].dtype, raw.dtype)

        raw = np.random.randint(10, size=(10, 20), dtype=np.int64)
        a = tensor(raw, chunk_size=(4, 6))

        x, indices, inverse, counts = \
            unique(a, axis=1, aggregate_size=2, return_index=True,
                   return_inverse=True, return_counts=True)

        self.assertEqual(x.shape, (10, np.nan))
        self.assertEqual(x.dtype, np.dtype(np.int64))
        self.assertEqual(indices.shape, (np.nan, ))
        self.assertEqual(indices.dtype, np.dtype(np.intp))
        self.assertEqual(inverse.shape, (20, ))
        self.assertEqual(inverse.dtype, np.dtype(np.intp))
        self.assertEqual(counts.shape, (np.nan, ))
        self.assertEqual(counts.dtype, np.dtype(np.int_))

        x = x.tiles()
        indices, inverse, counts = \
            get_tiled(indices), get_tiled(inverse), get_tiled(counts)

        self.assertEqual(len(x.chunks), 2)
        self.assertEqual(x.nsplits, ((10, ), (np.nan, np.nan)))
        for i in range(2):
            self.assertEqual(x.chunks[i].shape, (10, np.nan))
            self.assertEqual(x.chunks[i].dtype, raw.dtype)
            self.assertEqual(x.chunks[i].index, (0, i))

        self.assertEqual(len(indices.chunks), 2)
        self.assertEqual(indices.nsplits, ((np.nan, np.nan), ))
        for i in range(2):
            self.assertEqual(indices.chunks[i].shape, (np.nan, ))
            self.assertEqual(indices.chunks[i].dtype, raw.dtype)
            self.assertEqual(indices.chunks[i].index, (i, ))

        self.assertEqual(len(inverse.chunks), 4)
        self.assertEqual(inverse.nsplits, ((6, 6, 6, 2), ))
        for i in range(4):
            self.assertEqual(inverse.chunks[i].shape, ((6, 6, 6, 2)[i], ))
            self.assertEqual(inverse.chunks[i].dtype, np.dtype(np.int64))
            self.assertEqual(inverse.chunks[i].index, (i, ))

        self.assertEqual(len(counts.chunks), 2)
        self.assertEqual(counts.nsplits, ((np.nan, np.nan), ))
        for i in range(2):
            self.assertEqual(counts.chunks[i].shape, (np.nan, ))
            self.assertEqual(counts.chunks[i].dtype, np.dtype(np.int_))
            self.assertEqual(counts.chunks[i].index, (i, ))
Esempio n. 2
0
def test_unique():
    x = unique(np.int64(1))

    assert len(x.shape) == 1
    assert np.isnan(x.shape[0])
    assert x.dtype == np.dtype(np.int64)

    x = tile(x)

    assert len(x.chunks) == 1
    assert len(x.chunks[0].shape) == 1
    assert np.isnan(x.chunks[0].shape[0])
    assert x.chunks[0].dtype == np.dtype(np.int64)

    x, indices = unique(0.1, return_index=True)

    assert len(x.shape) == 1
    assert np.isnan(x.shape[0])
    assert x.dtype == np.dtype(np.float64)
    assert len(indices.shape) == 1
    assert np.isnan(indices.shape[0])
    assert indices.dtype == np.dtype(np.intp)

    x, indices = tile(x, indices)

    assert len(x.chunks) == 1
    assert len(x.chunks[0].shape) == 1
    assert np.isnan(x.chunks[0].shape[0])
    assert x.chunks[0].dtype == np.dtype(np.float64)
    assert len(indices.chunks) == 1
    assert len(indices.chunks[0].shape) == 1
    assert np.isnan(indices.chunks[0].shape[0])
    assert indices.chunks[0].dtype == np.dtype(np.intp)

    with pytest.raises(np.AxisError):
        unique(0.1, axis=1)

    raw = np.random.randint(10, size=(10), dtype=np.int64)
    a = tensor(raw, chunk_size=4)

    x = unique(a, aggregate_size=2)

    assert len(x.shape) == len(raw.shape)
    assert np.isnan(x.shape[0])
    assert x.dtype == np.dtype(np.int64)

    x = tile(x)

    assert len(x.chunks) == 2
    assert x.nsplits == ((np.nan, np.nan), )
    for i in range(2):
        assert x.chunks[i].shape == (np.nan, )
        assert x.chunks[i].dtype == raw.dtype

    raw = np.random.randint(10, size=(10, 20), dtype=np.int64)
    a = tensor(raw, chunk_size=(4, 6))

    x, indices, inverse, counts = \
        unique(a, axis=1, aggregate_size=2, return_index=True,
               return_inverse=True, return_counts=True)

    assert x.shape == (10, np.nan)
    assert x.dtype == np.dtype(np.int64)
    assert indices.shape == (np.nan, )
    assert indices.dtype == np.dtype(np.intp)
    assert inverse.shape == (20, )
    assert inverse.dtype == np.dtype(np.intp)
    assert counts.shape == (np.nan, )
    assert counts.dtype == np.dtype(np.int_)

    x, indices, inverse, counts = tile(x, indices, inverse, counts)

    assert len(x.chunks) == 2
    assert x.nsplits == ((10, ), (np.nan, np.nan))
    for i in range(2):
        assert x.chunks[i].shape == (10, np.nan)
        assert x.chunks[i].dtype == raw.dtype
        assert x.chunks[i].index == (0, i)

    assert len(indices.chunks) == 2
    assert indices.nsplits == ((np.nan, np.nan), )
    for i in range(2):
        assert indices.chunks[i].shape == (np.nan, )
        assert indices.chunks[i].dtype == raw.dtype
        assert indices.chunks[i].index == (i, )

    assert len(inverse.chunks) == 4
    assert inverse.nsplits == ((6, 6, 6, 2), )
    for i in range(4):
        assert inverse.chunks[i].shape == ((6, 6, 6, 2)[i], )
        assert inverse.chunks[i].dtype == np.dtype(np.int64)
        assert inverse.chunks[i].index == (i, )

    assert len(counts.chunks) == 2
    assert counts.nsplits == ((np.nan, np.nan), )
    for i in range(2):
        assert counts.chunks[i].shape == (np.nan, )
        assert counts.chunks[i].dtype == np.dtype(np.int_)
        assert counts.chunks[i].index == (i, )
Esempio n. 3
0
    def testUniqueExecution(self):
        rs = np.random.RandomState(0)
        raw = rs.randint(10, size=(10, ))

        for chunk_size in (10, 3):
            x = tensor(raw, chunk_size=chunk_size)

            y = unique(x)

            res = self.executor.execute_tensor(y, concat=True)[0]
            expected = np.unique(raw)
            np.testing.assert_array_equal(res, expected)

            y, indices = unique(x, return_index=True)

            res = self.executor.execute_tensors([y, indices])
            expected = np.unique(raw, return_index=True)
            self.assertEqual(len(res), 2)
            self.assertEqual(len(expected), 2)
            np.testing.assert_array_equal(res[0], expected[0])
            np.testing.assert_array_equal(res[1], expected[1])

            y, inverse = unique(x, return_inverse=True)

            res = self.executor.execute_tensors([y, inverse])
            expected = np.unique(raw, return_inverse=True)
            self.assertEqual(len(res), 2)
            self.assertEqual(len(expected), 2)
            np.testing.assert_array_equal(res[0], expected[0])
            np.testing.assert_array_equal(res[1], expected[1])

            y, counts = unique(x, return_counts=True)

            res = self.executor.execute_tensors([y, counts])
            expected = np.unique(raw, return_counts=True)
            self.assertEqual(len(res), 2)
            self.assertEqual(len(expected), 2)
            np.testing.assert_array_equal(res[0], expected[0])
            np.testing.assert_array_equal(res[1], expected[1])

            y, indices, inverse, counts = unique(x,
                                                 return_index=True,
                                                 return_inverse=True,
                                                 return_counts=True)

            res = self.executor.execute_tensors([y, indices, inverse, counts])
            expected = np.unique(raw,
                                 return_index=True,
                                 return_inverse=True,
                                 return_counts=True)
            self.assertEqual(len(res), 4)
            self.assertEqual(len(expected), 4)
            np.testing.assert_array_equal(res[0], expected[0])
            np.testing.assert_array_equal(res[1], expected[1])
            np.testing.assert_array_equal(res[2], expected[2])
            np.testing.assert_array_equal(res[3], expected[3])

            y, indices, counts = unique(x,
                                        return_index=True,
                                        return_counts=True)

            res = self.executor.execute_tensors([y, indices, counts])
            expected = np.unique(raw, return_index=True, return_counts=True)
            self.assertEqual(len(res), 3)
            self.assertEqual(len(expected), 3)
            np.testing.assert_array_equal(res[0], expected[0])
            np.testing.assert_array_equal(res[1], expected[1])
            np.testing.assert_array_equal(res[2], expected[2])

            raw2 = rs.randint(10, size=(4, 5, 6))
            x2 = tensor(raw2, chunk_size=chunk_size)

            y2 = unique(x2)

            res = self.executor.execute_tensor(y2, concat=True)[0]
            expected = np.unique(raw2)
            np.testing.assert_array_equal(res, expected)

            y2 = unique(x2, axis=1)

            res = self.executor.execute_tensor(y2, concat=True)[0]
            expected = np.unique(raw2, axis=1)
            np.testing.assert_array_equal(res, expected)

            y2 = unique(x2, axis=2)

            res = self.executor.execute_tensor(y2, concat=True)[0]
            expected = np.unique(raw2, axis=2)
            np.testing.assert_array_equal(res, expected)

        raw = rs.randint(10, size=(10, 20))
        raw[:, 0] = raw[:, 11] = rs.randint(10, size=(10, ))
        x = tensor(raw, chunk_size=2)
        y, ind, inv, counts = unique(x,
                                     aggregate_size=3,
                                     axis=1,
                                     return_index=True,
                                     return_inverse=True,
                                     return_counts=True)

        res_unique, res_ind, res_inv, res_counts = self.executor.execute_tensors(
            (y, ind, inv, counts))
        exp_unique, exp_ind, exp_counts = np.unique(raw,
                                                    axis=1,
                                                    return_index=True,
                                                    return_counts=True)
        raw_res_unique = res_unique
        res_unique_df = pd.DataFrame(res_unique)
        res_unique_ind = np.asarray(
            res_unique_df.sort_values(list(range(res_unique.shape[0])),
                                      axis=1).columns)
        res_unique = res_unique[:, res_unique_ind]
        res_ind = res_ind[res_unique_ind]
        res_counts = res_counts[res_unique_ind]

        np.testing.assert_array_equal(res_unique, exp_unique)
        np.testing.assert_array_equal(res_ind, exp_ind)
        np.testing.assert_array_equal(raw_res_unique[:, res_inv], raw)
        np.testing.assert_array_equal(res_counts, exp_counts)

        x = (mt.random.RandomState(0).rand(1000, chunk_size=20) > 0.5).astype(
            np.int32)
        y = unique(x)
        res = np.sort(self.executor.execute_tensor(y, concat=True)[0])
        np.testing.assert_array_equal(res, np.array([0, 1]))