def testUnique(self): x = unique(np.int64(1)) self.assertEqual(len(x.shape), 1) self.assertTrue(np.isnan(x.shape[0])) self.assertEqual(x.dtype, np.dtype(np.int64)) x = x.tiles() self.assertEqual(len(x.chunks), 1) self.assertEqual(len(x.chunks[0].shape), 1) self.assertTrue(np.isnan(x.chunks[0].shape[0])) self.assertEqual(x.chunks[0].dtype, np.dtype(np.int64)) x, indices = unique(0.1, return_index=True) self.assertEqual(len(x.shape), 1) self.assertTrue(np.isnan(x.shape[0])) self.assertEqual(x.dtype, np.dtype(np.float64)) self.assertEqual(len(indices.shape), 1) self.assertTrue(np.isnan(indices.shape[0])) self.assertEqual(indices.dtype, np.dtype(np.intp)) x = x.tiles() indices = get_tiled(indices) self.assertEqual(len(x.chunks), 1) self.assertEqual(len(x.chunks[0].shape), 1) self.assertTrue(np.isnan(x.chunks[0].shape[0])) self.assertEqual(x.chunks[0].dtype, np.dtype(np.float64)) self.assertEqual(len(indices.chunks), 1) self.assertEqual(len(indices.chunks[0].shape), 1) self.assertTrue(np.isnan(indices.chunks[0].shape[0])) self.assertEqual(indices.chunks[0].dtype, np.dtype(np.intp)) with self.assertRaises(np.AxisError): unique(0.1, axis=1) raw = np.random.randint(10, size=(10), dtype=np.int64) a = tensor(raw, chunk_size=4) x = unique(a, aggregate_size=2) self.assertEqual(len(x.shape), len(raw.shape)) self.assertTrue(np.isnan(x.shape[0])) self.assertEqual(x.dtype, np.dtype(np.int64)) x = x.tiles() self.assertEqual(len(x.chunks), 2) self.assertEqual(x.nsplits, ((np.nan, np.nan), )) for i in range(2): self.assertEqual(x.chunks[i].shape, (np.nan, )) self.assertEqual(x.chunks[i].dtype, raw.dtype) raw = np.random.randint(10, size=(10, 20), dtype=np.int64) a = tensor(raw, chunk_size=(4, 6)) x, indices, inverse, counts = \ unique(a, axis=1, aggregate_size=2, return_index=True, return_inverse=True, return_counts=True) self.assertEqual(x.shape, (10, np.nan)) self.assertEqual(x.dtype, np.dtype(np.int64)) self.assertEqual(indices.shape, (np.nan, )) self.assertEqual(indices.dtype, np.dtype(np.intp)) self.assertEqual(inverse.shape, (20, )) self.assertEqual(inverse.dtype, np.dtype(np.intp)) self.assertEqual(counts.shape, (np.nan, )) self.assertEqual(counts.dtype, np.dtype(np.int_)) x = x.tiles() indices, inverse, counts = \ get_tiled(indices), get_tiled(inverse), get_tiled(counts) self.assertEqual(len(x.chunks), 2) self.assertEqual(x.nsplits, ((10, ), (np.nan, np.nan))) for i in range(2): self.assertEqual(x.chunks[i].shape, (10, np.nan)) self.assertEqual(x.chunks[i].dtype, raw.dtype) self.assertEqual(x.chunks[i].index, (0, i)) self.assertEqual(len(indices.chunks), 2) self.assertEqual(indices.nsplits, ((np.nan, np.nan), )) for i in range(2): self.assertEqual(indices.chunks[i].shape, (np.nan, )) self.assertEqual(indices.chunks[i].dtype, raw.dtype) self.assertEqual(indices.chunks[i].index, (i, )) self.assertEqual(len(inverse.chunks), 4) self.assertEqual(inverse.nsplits, ((6, 6, 6, 2), )) for i in range(4): self.assertEqual(inverse.chunks[i].shape, ((6, 6, 6, 2)[i], )) self.assertEqual(inverse.chunks[i].dtype, np.dtype(np.int64)) self.assertEqual(inverse.chunks[i].index, (i, )) self.assertEqual(len(counts.chunks), 2) self.assertEqual(counts.nsplits, ((np.nan, np.nan), )) for i in range(2): self.assertEqual(counts.chunks[i].shape, (np.nan, )) self.assertEqual(counts.chunks[i].dtype, np.dtype(np.int_)) self.assertEqual(counts.chunks[i].index, (i, ))
def test_unique(): x = unique(np.int64(1)) assert len(x.shape) == 1 assert np.isnan(x.shape[0]) assert x.dtype == np.dtype(np.int64) x = tile(x) assert len(x.chunks) == 1 assert len(x.chunks[0].shape) == 1 assert np.isnan(x.chunks[0].shape[0]) assert x.chunks[0].dtype == np.dtype(np.int64) x, indices = unique(0.1, return_index=True) assert len(x.shape) == 1 assert np.isnan(x.shape[0]) assert x.dtype == np.dtype(np.float64) assert len(indices.shape) == 1 assert np.isnan(indices.shape[0]) assert indices.dtype == np.dtype(np.intp) x, indices = tile(x, indices) assert len(x.chunks) == 1 assert len(x.chunks[0].shape) == 1 assert np.isnan(x.chunks[0].shape[0]) assert x.chunks[0].dtype == np.dtype(np.float64) assert len(indices.chunks) == 1 assert len(indices.chunks[0].shape) == 1 assert np.isnan(indices.chunks[0].shape[0]) assert indices.chunks[0].dtype == np.dtype(np.intp) with pytest.raises(np.AxisError): unique(0.1, axis=1) raw = np.random.randint(10, size=(10), dtype=np.int64) a = tensor(raw, chunk_size=4) x = unique(a, aggregate_size=2) assert len(x.shape) == len(raw.shape) assert np.isnan(x.shape[0]) assert x.dtype == np.dtype(np.int64) x = tile(x) assert len(x.chunks) == 2 assert x.nsplits == ((np.nan, np.nan), ) for i in range(2): assert x.chunks[i].shape == (np.nan, ) assert x.chunks[i].dtype == raw.dtype raw = np.random.randint(10, size=(10, 20), dtype=np.int64) a = tensor(raw, chunk_size=(4, 6)) x, indices, inverse, counts = \ unique(a, axis=1, aggregate_size=2, return_index=True, return_inverse=True, return_counts=True) assert x.shape == (10, np.nan) assert x.dtype == np.dtype(np.int64) assert indices.shape == (np.nan, ) assert indices.dtype == np.dtype(np.intp) assert inverse.shape == (20, ) assert inverse.dtype == np.dtype(np.intp) assert counts.shape == (np.nan, ) assert counts.dtype == np.dtype(np.int_) x, indices, inverse, counts = tile(x, indices, inverse, counts) assert len(x.chunks) == 2 assert x.nsplits == ((10, ), (np.nan, np.nan)) for i in range(2): assert x.chunks[i].shape == (10, np.nan) assert x.chunks[i].dtype == raw.dtype assert x.chunks[i].index == (0, i) assert len(indices.chunks) == 2 assert indices.nsplits == ((np.nan, np.nan), ) for i in range(2): assert indices.chunks[i].shape == (np.nan, ) assert indices.chunks[i].dtype == raw.dtype assert indices.chunks[i].index == (i, ) assert len(inverse.chunks) == 4 assert inverse.nsplits == ((6, 6, 6, 2), ) for i in range(4): assert inverse.chunks[i].shape == ((6, 6, 6, 2)[i], ) assert inverse.chunks[i].dtype == np.dtype(np.int64) assert inverse.chunks[i].index == (i, ) assert len(counts.chunks) == 2 assert counts.nsplits == ((np.nan, np.nan), ) for i in range(2): assert counts.chunks[i].shape == (np.nan, ) assert counts.chunks[i].dtype == np.dtype(np.int_) assert counts.chunks[i].index == (i, )
def testUniqueExecution(self): rs = np.random.RandomState(0) raw = rs.randint(10, size=(10, )) for chunk_size in (10, 3): x = tensor(raw, chunk_size=chunk_size) y = unique(x) res = self.executor.execute_tensor(y, concat=True)[0] expected = np.unique(raw) np.testing.assert_array_equal(res, expected) y, indices = unique(x, return_index=True) res = self.executor.execute_tensors([y, indices]) expected = np.unique(raw, return_index=True) self.assertEqual(len(res), 2) self.assertEqual(len(expected), 2) np.testing.assert_array_equal(res[0], expected[0]) np.testing.assert_array_equal(res[1], expected[1]) y, inverse = unique(x, return_inverse=True) res = self.executor.execute_tensors([y, inverse]) expected = np.unique(raw, return_inverse=True) self.assertEqual(len(res), 2) self.assertEqual(len(expected), 2) np.testing.assert_array_equal(res[0], expected[0]) np.testing.assert_array_equal(res[1], expected[1]) y, counts = unique(x, return_counts=True) res = self.executor.execute_tensors([y, counts]) expected = np.unique(raw, return_counts=True) self.assertEqual(len(res), 2) self.assertEqual(len(expected), 2) np.testing.assert_array_equal(res[0], expected[0]) np.testing.assert_array_equal(res[1], expected[1]) y, indices, inverse, counts = unique(x, return_index=True, return_inverse=True, return_counts=True) res = self.executor.execute_tensors([y, indices, inverse, counts]) expected = np.unique(raw, return_index=True, return_inverse=True, return_counts=True) self.assertEqual(len(res), 4) self.assertEqual(len(expected), 4) np.testing.assert_array_equal(res[0], expected[0]) np.testing.assert_array_equal(res[1], expected[1]) np.testing.assert_array_equal(res[2], expected[2]) np.testing.assert_array_equal(res[3], expected[3]) y, indices, counts = unique(x, return_index=True, return_counts=True) res = self.executor.execute_tensors([y, indices, counts]) expected = np.unique(raw, return_index=True, return_counts=True) self.assertEqual(len(res), 3) self.assertEqual(len(expected), 3) np.testing.assert_array_equal(res[0], expected[0]) np.testing.assert_array_equal(res[1], expected[1]) np.testing.assert_array_equal(res[2], expected[2]) raw2 = rs.randint(10, size=(4, 5, 6)) x2 = tensor(raw2, chunk_size=chunk_size) y2 = unique(x2) res = self.executor.execute_tensor(y2, concat=True)[0] expected = np.unique(raw2) np.testing.assert_array_equal(res, expected) y2 = unique(x2, axis=1) res = self.executor.execute_tensor(y2, concat=True)[0] expected = np.unique(raw2, axis=1) np.testing.assert_array_equal(res, expected) y2 = unique(x2, axis=2) res = self.executor.execute_tensor(y2, concat=True)[0] expected = np.unique(raw2, axis=2) np.testing.assert_array_equal(res, expected) raw = rs.randint(10, size=(10, 20)) raw[:, 0] = raw[:, 11] = rs.randint(10, size=(10, )) x = tensor(raw, chunk_size=2) y, ind, inv, counts = unique(x, aggregate_size=3, axis=1, return_index=True, return_inverse=True, return_counts=True) res_unique, res_ind, res_inv, res_counts = self.executor.execute_tensors( (y, ind, inv, counts)) exp_unique, exp_ind, exp_counts = np.unique(raw, axis=1, return_index=True, return_counts=True) raw_res_unique = res_unique res_unique_df = pd.DataFrame(res_unique) res_unique_ind = np.asarray( res_unique_df.sort_values(list(range(res_unique.shape[0])), axis=1).columns) res_unique = res_unique[:, res_unique_ind] res_ind = res_ind[res_unique_ind] res_counts = res_counts[res_unique_ind] np.testing.assert_array_equal(res_unique, exp_unique) np.testing.assert_array_equal(res_ind, exp_ind) np.testing.assert_array_equal(raw_res_unique[:, res_inv], raw) np.testing.assert_array_equal(res_counts, exp_counts) x = (mt.random.RandomState(0).rand(1000, chunk_size=20) > 0.5).astype( np.int32) y = unique(x) res = np.sort(self.executor.execute_tensor(y, concat=True)[0]) np.testing.assert_array_equal(res, np.array([0, 1]))