Example #1
0
    def testPartition(self):
        a = tensor(np.random.rand(10, 10), chunk_size=(5, 10))

        pa = partition(a, [4, 9])
        self.assertEqual(type(pa.op).__name__, 'TensorPartition')

        pa = pa.tiles()

        self.assertEqual(len(pa.chunks), 2)
        for c in pa.chunks:
            self.assertEqual(type(c.op).__name__, 'TensorPartition')
            self.assertEqual(type(c.inputs[0].op).__name__, 'ArrayDataSource')

        a = tensor(np.random.rand(100), chunk_size=(10))

        pa = partition(a, 4)
        self.assertEqual(type(pa.op).__name__, 'TensorPartition')

        pa = pa.tiles()

        for c in pa.chunks:
            self.assertEqual(type(c.op).__name__, 'PartitionMerged')
            self.assertEqual(c.shape, (np.nan, ))

        a = tensor(np.empty((10, 10),
                            dtype=[('id', np.int32), ('size', np.int64)]),
                   chunk_size=(10, 5))
        pa = partition(a, 3)
        self.assertSequenceEqual(pa.op.order, ['id', 'size'])

        with self.assertRaises(ValueError):
            partition(a, 4, order=['unknown_field'])

        with self.assertRaises(np.AxisError):
            partition(np.random.rand(100), 4, axis=1)

        with self.assertRaises(ValueError):
            partition(np.random.rand(100), 4, kind='non_valid_kind')

        with self.assertRaises(ValueError):
            partition(np.random.rand(10), 10)

        with self.assertRaises(TypeError):
            partition(np.random.rand(10), tensor([1.0, 2.0]))

        with self.assertRaises(ValueError):
            partition(np.random.rand(10), tensor([[1, 2]]))

        with self.assertRaises(ValueError):
            partition(np.random.rand(10), [-11, 2])
Example #2
0
def test_partition():
    a = tensor(np.random.rand(10, 10), chunk_size=(5, 10))

    pa = partition(a, [4, 9])
    assert type(pa.op).__name__ == 'TensorPartition'

    pa = tile(pa)

    assert len(pa.chunks) == 2
    for c in pa.chunks:
        assert type(c.op).__name__ == 'TensorPartition'
        assert type(c.inputs[0].op).__name__ == 'ArrayDataSource'

    a = tensor(np.random.rand(100), chunk_size=(10))

    pa = partition(a, 4)
    assert type(pa.op).__name__ == 'TensorPartition'

    pa = tile(pa)

    for c in pa.chunks:
        assert type(c.op).__name__ == 'PartitionMerged'
        assert c.shape == (np.nan, )

    a = tensor(np.empty((10, 10), dtype=[('id', np.int32),
                                         ('size', np.int64)]),
               chunk_size=(10, 5))
    pa = partition(a, 3)
    assert pa.op.order == ['id', 'size']

    with pytest.raises(ValueError):
        partition(a, 4, order=['unknown_field'])

    with pytest.raises(np.AxisError):
        partition(np.random.rand(100), 4, axis=1)

    with pytest.raises(ValueError):
        partition(np.random.rand(100), 4, kind='non_valid_kind')

    with pytest.raises(ValueError):
        partition(np.random.rand(10), 10)

    with pytest.raises(TypeError):
        partition(np.random.rand(10), tensor([1.0, 2.0]))

    with pytest.raises(ValueError):
        partition(np.random.rand(10), tensor([[1, 2]]))

    with pytest.raises(ValueError):
        partition(np.random.rand(10), [-11, 2])
Example #3
0
    def testPartitionExecution(self):
        # only 1 chunk when axis = -1
        raw = np.random.rand(100, 10)
        x = tensor(raw, chunk_size=10)

        px = partition(x, [1, 8])

        res = self.executor.execute_tensor(px, concat=True)[0]
        np.testing.assert_array_equal(res, np.partition(raw, [1, 8]))

        # 1-d chunk
        raw = np.random.rand(100)
        x = tensor(raw, chunk_size=10)

        kth = np.random.RandomState(0).randint(-100, 100, size=(10, ))
        px = partition(x, kth)

        res = self.executor.execute_tensor(px, concat=True)[0]
        np.testing.assert_array_equal(res[kth], np.partition(raw, kth)[kth])

        # structured dtype
        raw = np.empty(100, dtype=[('id', np.int32), ('size', np.int64)])
        raw['id'] = np.random.randint(1000, size=100, dtype=np.int32)
        raw['size'] = np.random.randint(1000, size=100, dtype=np.int64)
        x = tensor(raw, chunk_size=10)

        px = partition(x, kth, order=['size', 'id'])

        res = self.executor.execute_tensor(px, concat=True)[0]
        np.testing.assert_array_equal(
            res[kth],
            np.partition(raw, kth, order=['size', 'id'])[kth])

        # test flatten case
        raw = np.random.rand(10, 10)
        x = tensor(raw, chunk_size=5)

        px = partition(x, kth, axis=None)

        res = self.executor.execute_tensor(px, concat=True)[0]
        np.testing.assert_array_equal(res[kth],
                                      np.partition(raw, kth, axis=None)[kth])

        # test multi-dimension
        raw = np.random.rand(10, 100)
        x = tensor(raw, chunk_size=(2, 10))

        kth = np.random.RandomState(0).randint(-10, 10, size=(3, ))
        px = partition(x, kth)

        res = self.executor.execute_tensor(px, concat=True)[0]
        np.testing.assert_array_equal(res[:, kth],
                                      np.partition(raw, kth)[:, kth])

        raw = np.random.rand(10, 99)
        x = tensor(raw, chunk_size=(2, 10))

        px = partition(x, kth)

        res = self.executor.execute_tensor(px, concat=True)[0]
        np.testing.assert_array_equal(res[:, kth],
                                      np.partition(raw, kth)[:, kth])

        # test 3-d
        raw = np.random.rand(20, 25, 28)
        x = tensor(raw, chunk_size=(10, 5, 7))

        kth = np.random.RandomState(0).randint(-28, 28, size=(3, ))
        px = partition(x, kth)

        res = self.executor.execute_tensor(px, concat=True)[0]
        np.testing.assert_array_equal(res[:, :, kth],
                                      np.partition(raw, kth)[:, :, kth])

        kth = np.random.RandomState(0).randint(-20, 20, size=(3, ))
        px = partition(x, kth, axis=0)

        res = self.executor.execute_tensor(px, concat=True)[0]
        np.testing.assert_array_equal(res[kth],
                                      np.partition(raw, kth, axis=0)[kth])

        kth = np.random.RandomState(0).randint(-25, 25, size=(3, ))
        px = partition(x, kth, axis=1)

        res = self.executor.execute_tensor(px, concat=True)[0]
        np.testing.assert_array_equal(res[:, kth],
                                      np.partition(raw, kth, axis=1)[:, kth])

        # test multi-dimension with structured type
        raw = np.empty((10, 100), dtype=[('id', np.int32), ('size', np.int64)])
        raw['id'] = np.random.randint(1000, size=(10, 100), dtype=np.int32)
        raw['size'] = np.random.randint(1000, size=(10, 100), dtype=np.int64)
        x = tensor(raw, chunk_size=(3, 10))

        kth = np.random.RandomState(0).randint(-100, 100, size=(10, ))
        px = partition(x, kth)

        res = self.executor.execute_tensor(px, concat=True)[0]
        np.testing.assert_array_equal(res[:, kth],
                                      np.partition(raw, kth)[:, kth])

        px = partition(x, kth, order=['size', 'id'])

        res = self.executor.execute_tensor(px, concat=True)[0]
        np.testing.assert_array_equal(
            res[:, kth],
            np.partition(raw, kth, order=['size', 'id'])[:, kth])

        px = partition(x, kth, order=['size'])

        res = self.executor.execute_tensor(px, concat=True)[0]
        np.testing.assert_array_equal(
            res[:, kth],
            np.partition(raw, kth, order=['size'])[:, kth])

        kth = np.random.RandomState(0).randint(-10, 10, size=(5, ))
        px = partition(x, kth, axis=0, order=['size', 'id'])

        res = self.executor.execute_tensor(px, concat=True)[0]
        np.testing.assert_array_equal(
            res[kth],
            np.partition(raw, kth, axis=0, order=['size', 'id'])[kth])

        raw = np.random.rand(10, 12)
        a = tensor(raw, chunk_size=(5, 4))
        kth = np.random.RandomState(0).randint(-12, 12, size=(2, ))
        a.partition(kth, axis=1)

        res = self.executor.execute_tensor(a, concat=True)[0]
        np.testing.assert_array_equal(res[:, kth],
                                      np.partition(raw, kth, axis=1)[:, kth])

        kth = np.random.RandomState(0).randint(-10, 10, size=(2, ))
        a.partition(kth, axis=0)

        raw_base = res
        res = self.executor.execute_tensor(a, concat=True)[0]
        np.testing.assert_array_equal(res[kth],
                                      np.partition(raw_base, kth, axis=0)[kth])

        # test kth which is tensor
        raw = np.random.rand(10, 12)
        a = tensor(raw, chunk_size=(3, 5))
        kth = (mt.random.rand(5) * 24 - 12).astype(int)

        px = partition(a, kth)
        sx = sort(a)

        res = self.executor.execute_tensor(px, concat=True)[0]
        kth_res = self.executor.execute_tensor(kth, concat=True)[0]
        sort_res = self.executor.execute_tensor(sx, concat=True)[0]
        np.testing.assert_array_equal(res[:, kth_res], sort_res[:, kth_res])

        a = tensor(raw, chunk_size=(10, 12))
        kth = (mt.random.rand(5) * 24 - 12).astype(int)

        px = partition(a, kth)
        sx = sort(a)

        res = self.executor.execute_tensor(px, concat=True)[0]
        kth_res = self.executor.execute_tensor(kth, concat=True)[0]
        sort_res = self.executor.execute_tensor(sx, concat=True)[0]
        np.testing.assert_array_equal(res[:, kth_res], sort_res[:, kth_res])