Ejemplo n.º 1
0
class Test(unittest.TestCase):
    def setUp(self) -> None:
        self.executor = ExecutorForTest('numpy')

    def testCheckNonNegativeThenReturnValueExecution(self):
        raw = np.random.randint(10, size=(10, 5))
        c = mt.tensor(raw, chunk_size=(3, 2))

        r = check_non_negative_then_return_value(c, c, 'sth')
        result = self.executor.execute_tileable(r, concat=True)[0]
        np.testing.assert_array_equal(result, raw)

        raw = raw.copy()
        raw[1, 3] = -1
        c = mt.tensor(raw, chunk_size=(3, 2))

        r = check_non_negative_then_return_value(c, c, 'sth')
        with self.assertRaises(ValueError):
            _ = self.executor.execute_tileable(r, concat=True)[0]

        raw = sps.random(10, 5, density=.3, format='csr')
        c = mt.tensor(raw, chunk_size=(3, 2))

        r = check_non_negative_then_return_value(c, c, 'sth')
        result = self.executor.execute_tileable(r, concat=True)[0]
        np.testing.assert_array_equal(result.toarray(), raw.A)

        raw = raw.copy()
        raw[1, 3] = -1
        c = mt.tensor(raw, chunk_size=(3, 2))

        r = check_non_negative_then_return_value(c, c, 'sth')
        with self.assertRaises(ValueError):
            _ = self.executor.execute_tileable(r, concat=True)[0]

        raw = pd.DataFrame(np.random.rand(10, 4))
        c = md.DataFrame(raw, chunk_size=(3, 2))

        r = check_non_negative_then_return_value(c, c, 'sth')
        result = self.executor.execute_tileable(r, concat=True)[0]

        pd.testing.assert_frame_equal(result, raw)

        raw = raw.copy()
        raw.iloc[1, 3] = -1
        c = md.DataFrame(raw, chunk_size=(3, 2))

        r = check_non_negative_then_return_value(c, c, 'sth')
        with self.assertRaises(ValueError):
            _ = self.executor.execute_tileable(r, concat=True)[0]
Ejemplo n.º 2
0
class Test(TestBase):
    def setUp(self):
        self.executor = ExecutorForTest('numpy')
        self.old_chunk = options.chunk_size
        options.chunk_size = 10

    def tearDown(self):
        options.chunk_size = self.old_chunk

    def testBoolIndexingExecution(self):
        raw = np.random.random((11, 8, 12, 14))
        arr = tensor(raw, chunk_size=3)

        index = arr < .5
        arr2 = arr[index]
        size_res = self.executor.execute_tensor(arr2, mock=True)
        res = self.executor.execute_tensor(arr2)

        self.assertEqual(sum(s[0] for s in size_res), arr.nbytes)
        np.testing.assert_array_equal(np.sort(np.concatenate(res)),
                                      np.sort(raw[raw < .5]))

        index2 = tensor(raw[:, :, 0, 0], chunk_size=3) < .5
        arr3 = arr[index2]
        res = self.executor.execute_tensor(arr3, concat=True)[0]

        expected = raw[raw[:, :, 0, 0] < .5]
        self.assertEqual(sum(it.size for it in res), expected.size)
        self.assertEqual(res.shape, expected.shape)

        raw = np.asfortranarray(np.random.random((11, 8, 12, 14)))
        arr = tensor(raw, chunk_size=3)

        index = tensor(raw[:, :, 0, 0], chunk_size=3) < .5
        arr2 = arr[index]
        res = self.executor.execute_tensor(arr2, concat=True)[0]
        expected = raw[raw[:, :, 0, 0] < .5].copy('A')

        self.assertEqual(res.flags['C_CONTIGUOUS'],
                         expected.flags['C_CONTIGUOUS'])
        self.assertEqual(res.flags['F_CONTIGUOUS'],
                         expected.flags['F_CONTIGUOUS'])

    def testFancyIndexingNumpyExecution(self):
        # test fancy index of type numpy ndarray
        raw = np.random.random((11, 8, 12, 14))
        arr = tensor(raw, chunk_size=(2, 3, 2, 3))

        index = [9, 10, 3, 1, 8, 10]
        arr2 = arr[index]

        res = self.executor.execute_tensor(arr2, concat=True)[0]
        np.testing.assert_array_equal(res, raw[index])

        index = np.random.permutation(8)
        arr3 = arr[:2, ..., index]

        res = self.executor.execute_tensor(arr3, concat=True)[0]
        np.testing.assert_array_equal(res, raw[:2, ..., index])

        index = [1, 3, 9, 10]
        arr4 = arr[..., index, :5]

        res = self.executor.execute_tensor(arr4, concat=True)[0]
        np.testing.assert_array_equal(res, raw[..., index, :5])

        index1 = [8, 10, 3, 1, 9, 10]
        index2 = [1, 3, 9, 10, 2, 7]
        arr5 = arr[index1, :, index2]

        res = self.executor.execute_tensor(arr5, concat=True)[0]
        np.testing.assert_array_equal(res, raw[index1, :, index2])

        index1 = [1, 3, 5, 7, 9, 10]
        index2 = [1, 9, 9, 10, 2, 7]
        arr6 = arr[index1, :, index2]

        res = self.executor.execute_tensor(arr6, concat=True)[0]
        np.testing.assert_array_equal(res, raw[index1, :, index2])
        # fancy index is ordered, no concat required
        self.assertGreater(len(get_tiled(arr6).nsplits[0]), 1)

        index1 = [[8, 10, 3], [1, 9, 10]]
        index2 = [[1, 3, 9], [10, 2, 7]]
        arr7 = arr[index1, :, index2]

        res = self.executor.execute_tensor(arr7, concat=True)[0]
        np.testing.assert_array_equal(res, raw[index1, :, index2])

        index1 = [[1, 3], [3, 7], [7, 7]]
        index2 = [1, 9]
        arr8 = arr[0, index1, :, index2]

        res = self.executor.execute_tensor(arr8, concat=True)[0]
        np.testing.assert_array_equal(res, raw[0, index1, :, index2])

    def testFancyIndexingTensorExecution(self):
        # test fancy index of type tensor

        raw = np.random.random((11, 8, 12, 14))
        arr = tensor(raw, chunk_size=(2, 3, 2, 3))

        raw_index = [8, 10, 3, 1, 9, 10]
        index = tensor(raw_index, chunk_size=4)
        arr2 = arr[index]

        res = self.executor.execute_tensor(arr2, concat=True)[0]
        np.testing.assert_array_equal(res, raw[raw_index])

        raw_index = np.random.permutation(8)
        index = tensor(raw_index, chunk_size=3)
        arr3 = arr[:2, ..., index]

        res = self.executor.execute_tensor(arr3, concat=True)[0]
        np.testing.assert_array_equal(res, raw[:2, ..., raw_index])

        raw_index = [1, 3, 9, 10]
        index = tensor(raw_index)
        arr4 = arr[..., index, :5]

        res = self.executor.execute_tensor(arr4, concat=True)[0]
        np.testing.assert_array_equal(res, raw[..., raw_index, :5])

        raw_index1 = [8, 10, 3, 1, 9, 10]
        raw_index2 = [1, 3, 9, 10, 2, 7]
        index1 = tensor(raw_index1, chunk_size=4)
        index2 = tensor(raw_index2, chunk_size=3)
        arr5 = arr[index1, :, index2]

        res = self.executor.execute_tensor(arr5, concat=True)[0]
        np.testing.assert_array_equal(res, raw[raw_index1, :, raw_index2])

        raw_index1 = [1, 3, 5, 7, 9, 10]
        raw_index2 = [1, 9, 9, 10, 2, 7]
        index1 = tensor(raw_index1, chunk_size=3)
        index2 = tensor(raw_index2, chunk_size=4)
        arr6 = arr[index1, :, index2]

        res = self.executor.execute_tensor(arr6, concat=True)[0]
        np.testing.assert_array_equal(res, raw[raw_index1, :, raw_index2])

        raw_index1 = [[8, 10, 3], [1, 9, 10]]
        raw_index2 = [[1, 3, 9], [10, 2, 7]]
        index1 = tensor(raw_index1)
        index2 = tensor(raw_index2, chunk_size=2)
        arr7 = arr[index1, :, index2]

        res = self.executor.execute_tensor(arr7, concat=True)[0]
        np.testing.assert_array_equal(res, raw[raw_index1, :, raw_index2])

        raw_index1 = [[1, 3], [3, 7], [7, 7]]
        raw_index2 = [1, 9]
        index1 = tensor(raw_index1, chunk_size=(2, 1))
        index2 = tensor(raw_index2)
        arr8 = arr[0, index1, :, index2]

        res = self.executor.execute_tensor(arr8, concat=True)[0]
        np.testing.assert_array_equal(res, raw[0, raw_index1, :, raw_index2])

        raw_a = np.random.rand(30, 30)
        a = tensor(raw_a, chunk_size=(13, 17))
        b = a.argmax(axis=0)
        c = a[b, arange(30)]
        res = self.executor.execute_tensor(c, concat=True)[0]

        np.testing.assert_array_equal(
            res, raw_a[raw_a.argmax(axis=0),
                       np.arange(30)])

        # test one chunk
        arr = tensor(raw, chunk_size=20)

        raw_index = [8, 10, 3, 1, 9, 10]
        index = tensor(raw_index, chunk_size=20)
        arr9 = arr[index]

        res = self.executor.execute_tensor(arr9, concat=True)[0]
        np.testing.assert_array_equal(res, raw[raw_index])

        raw_index1 = [[1, 3], [3, 7], [7, 7]]
        raw_index2 = [1, 9]
        index1 = tensor(raw_index1)
        index2 = tensor(raw_index2)
        arr10 = arr[0, index1, :, index2]

        res = self.executor.execute_tensor(arr10, concat=True)[0]
        np.testing.assert_array_equal(res, raw[0, raw_index1, :, raw_index2])

        # test order
        raw = np.asfortranarray(np.random.random((11, 8, 12, 14)))
        arr = tensor(raw, chunk_size=(2, 3, 2, 3))

        raw_index = [8, 10, 3, 1, 9, 10]
        index = tensor(raw_index, chunk_size=4)
        arr11 = arr[index]

        res = self.executor.execute_tensor(arr11, concat=True)[0]
        expected = raw[raw_index].copy('A')
        np.testing.assert_array_equal(res, expected)
        self.assertEqual(res.flags['C_CONTIGUOUS'],
                         expected.flags['C_CONTIGUOUS'])
        self.assertEqual(res.flags['F_CONTIGUOUS'],
                         expected.flags['F_CONTIGUOUS'])

    def testSliceExecution(self):
        raw = np.random.random((11, 8, 12, 14))
        arr = tensor(raw, chunk_size=3)

        arr2 = arr[2:9:2, 3:7, -1:-9:-2, 12:-11:-4]
        res = self.executor.execute_tensor(arr2, concat=True)[0]

        np.testing.assert_array_equal(res, raw[2:9:2, 3:7, -1:-9:-2,
                                               12:-11:-4])

        arr3 = arr[-4, 2:]
        res = self.executor.execute_tensor(arr3, concat=True)[0]
        np.testing.assert_equal(res, raw[-4, 2:])

        raw = sps.random(12, 14, density=.1)
        arr = tensor(raw, chunk_size=3)

        arr2 = arr[-1:-9:-2, 12:-11:-4]
        res = self.executor.execute_tensor(arr2, concat=True)[0]

        np.testing.assert_equal(res.toarray(),
                                raw.toarray()[-1:-9:-2, 12:-11:-4])

        # test order
        raw = np.asfortranarray(np.random.random((11, 8, 12, 14)))
        arr = tensor(raw, chunk_size=3)

        arr2 = arr[2:9:2, 3:7, -1:-9:-2, 12:-11:-4]
        res = self.executor.execute_tensor(arr2, concat=True)[0]
        expected = raw[2:9:2, 3:7, -1:-9:-2, 12:-11:-4].copy('A')

        np.testing.assert_array_equal(res, expected)
        self.assertEqual(res.flags['C_CONTIGUOUS'],
                         expected.flags['C_CONTIGUOUS'])
        self.assertEqual(res.flags['F_CONTIGUOUS'],
                         expected.flags['F_CONTIGUOUS'])

        arr3 = arr[0:13, :, None]
        res = self.executor.execute_tensor(arr3, concat=True)[0]
        expected = raw[0:13, :, None].copy('A')

        np.testing.assert_array_equal(res, expected)
        self.assertEqual(res.flags['C_CONTIGUOUS'],
                         expected.flags['C_CONTIGUOUS'])
        self.assertEqual(res.flags['F_CONTIGUOUS'],
                         expected.flags['F_CONTIGUOUS'])

    def testMixedIndexingExecution(self):
        rs = np.random.RandomState(0)
        raw = rs.random((11, 8, 12, 13))
        arr = tensor(raw, chunk_size=3)

        raw_cond = raw[0, :, 0, 0] < .5
        cond = tensor(raw[0, :, 0, 0], chunk_size=3) < .5
        arr2 = arr[10::-2, cond, None, ..., :5]
        size_res = self.executor.execute_tensor(arr2, mock=True)
        res = self.executor.execute_tensor(arr2, concat=True)[0]

        new_shape = list(arr2.shape)
        new_shape[1] = cond.shape[0]
        self.assertEqual(sum(s[0] for s in size_res),
                         int(np.prod(new_shape) * arr2.dtype.itemsize))
        np.testing.assert_array_equal(res, raw[10::-2, raw_cond, None,
                                               ..., :5])

        b_raw = np.random.random(8)
        raw_cond = b_raw < .5
        conds = [raw_cond, tensor(b_raw, chunk_size=2) < .5]
        for cond in conds:
            arr3 = arr[-2::-3, cond, ...]
            res = self.executor.execute_tensor(arr3, concat=True)[0]

            np.testing.assert_array_equal(res, raw[-2::-3, raw_cond, ...])

        # test multiple bool index and fancy index
        cond1 = np.zeros(11, dtype=bool)
        cond1[rs.permutation(11)[:5]] = True
        cond2 = np.zeros(12, dtype=bool)
        cond2[rs.permutation(12)[:5]] = True
        f3 = np.random.randint(13, size=5)

        expected = raw[cond1, ..., cond2, f3]

        t = arr[cond1, ..., cond2, f3]
        res = self.executor.execute_tensor(t, concat=True)[0]
        np.testing.assert_array_equal(res, expected)

        ctx, executor = self._create_test_context(self.executor)
        with ctx:
            t = arr[tensor(cond1), ..., tensor(cond2), tensor(f3)]
            res = executor.execute_tensors([t])[0]
            np.testing.assert_array_equal(res, expected)

    def testSetItemExecution(self):
        rs = np.random.RandomState(0)

        raw = data = rs.randint(0, 10, size=(11, 8, 12, 13))
        arr = tensor(raw.copy(), chunk_size=3)
        raw = raw.copy()

        idx = slice(2, 9, 2), slice(3, 7), slice(-1, -9, -2), 2
        arr[idx] = 20
        res = self.executor.execute_tensor(arr, concat=True)[0]

        raw[idx] = 20
        np.testing.assert_array_equal(res, raw)
        self.assertEqual(res.flags['C_CONTIGUOUS'], raw.flags['C_CONTIGUOUS'])
        self.assertEqual(res.flags['F_CONTIGUOUS'], raw.flags['F_CONTIGUOUS'])

        raw = data
        shape = raw[idx].shape

        arr2 = tensor(raw.copy(), chunk_size=3)
        raw = raw.copy()

        replace = rs.randint(10, 20, size=shape[:-1] + (1, )).astype('f4')
        arr2[idx] = tensor(replace, chunk_size=4)
        res = self.executor.execute_tensor(arr2, concat=True)[0]

        raw[idx] = replace
        np.testing.assert_array_equal(res, raw)

        raw = np.asfortranarray(np.random.randint(0, 10, size=(11, 8, 12, 13)))
        arr = tensor(raw.copy('A'), chunk_size=3)
        raw = raw.copy('A')

        idx = slice(2, 9, 2), slice(3, 7), slice(-1, -9, -2), 2
        arr[idx] = 20
        res = self.executor.execute_tensor(arr, concat=True)[0]

        raw[idx] = 20
        np.testing.assert_array_equal(res, raw)
        self.assertEqual(res.flags['C_CONTIGUOUS'], raw.flags['C_CONTIGUOUS'])
        self.assertEqual(res.flags['F_CONTIGUOUS'], raw.flags['F_CONTIGUOUS'])

        # test bool indexing set
        raw = data

        arr = tensor(raw.copy(), chunk_size=3)
        raw1 = rs.rand(11)
        arr[tensor(raw1, chunk_size=4) < 0.6, 2:7] = 3
        res = self.executor.execute_tileable(arr, concat=True)[0]

        raw[raw1 < 0.6, 2:7] = 3
        np.testing.assert_array_equal(res, raw)

        raw = np.random.randint(3, size=10).astype(np.int64)
        raw2 = np.arange(3)

        arr = zeros((10, 3))
        arr[tensor(raw) == 1, tensor(raw2) == 1] = 1
        res = self.executor.execute_tileable(arr, concat=True)[0]

        expected = np.zeros((10, 3))
        expected[raw == 1, raw2 == 1] = 1
        np.testing.assert_array_equal(res, expected)

        ctx, executor = self._create_test_context(self.executor)
        with ctx:
            raw = data

            arr = tensor(raw.copy(), chunk_size=3)
            raw1 = rs.rand(11)
            set_data = rs.rand((raw1 < 0.8).sum(), 8, 12, 13)
            arr[tensor(raw1, chunk_size=4) < 0.8] = tensor(set_data)

            res = self.executor.execute_tileables([arr])[0]

            raw[raw1 < 0.8] = set_data
            np.testing.assert_array_equal(res, raw)

        # test error
        with self.assertRaises(ValueError):
            t = tensor(raw, chunk_size=3)
            t[0, 0, 0, 0] = zeros(2, chunk_size=10)
            _ = self.executor.execute_tensor(t)

    def testSetItemStructuredExecution(self):
        rec_type = np.dtype([('a', np.int32), ('b', np.double),
                             ('c', np.dtype([('a', np.int16),
                                             ('b', np.int64)]))])

        raw = np.zeros((4, 5), dtype=rec_type)
        arr = tensor(raw.copy(), chunk_size=3)

        arr[1:4, 1] = (3, 4., (5, 6))
        arr[1:4, 2] = 8
        arr[1:3] = np.arange(5)
        arr[2:4] = np.arange(10).reshape(2, 5)
        arr[0] = np.arange(5)

        raw[1:4, 1] = (3, 4., (5, 6))
        raw[1:4, 2] = 8
        raw[1:3] = np.arange(5)
        raw[2:4] = np.arange(10).reshape(2, 5)
        raw[0] = np.arange(5)

        res = self.executor.execute_tensor(arr, concat=True)[0]
        self.assertEqual(arr.dtype, raw.dtype)
        self.assertEqual(arr.shape, raw.shape)
        np.testing.assert_array_equal(res, raw)

    def testTakeExecution(self):
        data = np.random.rand(10, 20, 30)
        t = tensor(data, chunk_size=10)

        a = t.take([4, 1, 2, 6, 200])

        res = self.executor.execute_tensor(a, concat=True)[0]
        expected = np.take(data, [4, 1, 2, 6, 200])
        np.testing.assert_array_equal(res, expected)

        a = take(t, [5, 19, 2, 13], axis=1)

        res = self.executor.execute_tensor(a, concat=True)[0]
        expected = np.take(data, [5, 19, 2, 13], axis=1)
        np.testing.assert_array_equal(res, expected)

        with self.assertRaises(ValueError):
            take(t, [1, 3, 4], out=tensor(np.random.rand(4)))

        out = tensor([1, 2, 3, 4])
        a = take(t, [4, 19, 2, 8], out=out)

        res = self.executor.execute_tensor(out, concat=True)[0]
        expected = np.take(data, [4, 19, 2, 8])
        np.testing.assert_array_equal(res, expected)

    def testCompressExecution(self):
        data = np.array([[1, 2], [3, 4], [5, 6]])
        a = tensor(data, chunk_size=1)

        t = compress([0, 1], a, axis=0)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.compress([0, 1], data, axis=0)
        np.testing.assert_array_equal(res, expected)

        t = compress([0, 1], a, axis=1)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.compress([0, 1], data, axis=1)
        np.testing.assert_array_equal(res, expected)

        t = a.compress([0, 1, 1])

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.compress([0, 1, 1], data)
        np.testing.assert_array_equal(res, expected)

        t = compress([False, True, True], a, axis=0)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.compress([False, True, True], data, axis=0)
        np.testing.assert_array_equal(res, expected)

        t = compress([False, True], a, axis=1)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.compress([False, True], data, axis=1)
        np.testing.assert_array_equal(res, expected)

        with self.assertRaises(np.AxisError):
            compress([0, 1, 1], a, axis=1)

        # test order
        data = np.asfortranarray([[1, 2], [3, 4], [5, 6]])
        a = tensor(data, chunk_size=1)

        t = compress([0, 1, 1], a, axis=0)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.compress([0, 1, 1], data, axis=0)
        np.testing.assert_array_equal(res, expected)
        self.assertEqual(res.flags['C_CONTIGUOUS'],
                         expected.flags['C_CONTIGUOUS'])
        self.assertEqual(res.flags['F_CONTIGUOUS'],
                         expected.flags['F_CONTIGUOUS'])

        t = compress([0, 1, 1],
                     a,
                     axis=0,
                     out=tensor(np.empty((2, 2), order='F', dtype=int)))

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.compress([0, 1, 1],
                               data,
                               axis=0,
                               out=np.empty((2, 2), order='F', dtype=int))
        np.testing.assert_array_equal(res, expected)
        self.assertEqual(res.flags['C_CONTIGUOUS'],
                         expected.flags['C_CONTIGUOUS'])
        self.assertEqual(res.flags['F_CONTIGUOUS'],
                         expected.flags['F_CONTIGUOUS'])

    def testExtractExecution(self):
        data = np.arange(12).reshape((3, 4))
        a = tensor(data, chunk_size=2)
        condition = mod(a, 3) == 0

        t = extract(condition, a)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.extract(np.mod(data, 3) == 0, data)
        np.testing.assert_array_equal(res, expected)

    def testChooseExecution(self):
        options.chunk_size = 2

        choices = [[0, 1, 2, 3], [10, 11, 12, 13], [20, 21, 22, 23],
                   [30, 31, 32, 33]]
        a = choose([2, 3, 1, 0], choices)

        res = self.executor.execute_tensor(a, concat=True)[0]
        expected = np.choose([2, 3, 1, 0], choices)

        np.testing.assert_array_equal(res, expected)

        a = choose([2, 4, 1, 0], choices, mode='clip')  # 4 goes to 3 (4-1)
        expected = np.choose([2, 4, 1, 0], choices, mode='clip')

        res = self.executor.execute_tensor(a, concat=True)[0]
        np.testing.assert_array_equal(res, expected)

        a = choose([2, 4, 1, 0], choices, mode='wrap')  # 4 goes to (4 mod 4)
        expected = np.choose([2, 4, 1, 0], choices,
                             mode='wrap')  # 4 goes to (4 mod 4)

        res = self.executor.execute_tensor(a, concat=True)[0]
        np.testing.assert_array_equal(res, expected)

        a = [[1, 0, 1], [0, 1, 0], [1, 0, 1]]
        choices = [-10, 10]

        b = choose(a, choices)
        expected = np.choose(a, choices)

        res = self.executor.execute_tensor(b, concat=True)[0]
        np.testing.assert_array_equal(res, expected)

        a = np.array([0, 1]).reshape((2, 1, 1))
        c1 = np.array([1, 2, 3]).reshape((1, 3, 1))
        c2 = np.array([-1, -2, -3, -4, -5]).reshape((1, 1, 5))

        b = choose(a, (c1, c2))
        expected = np.choose(a, (c1, c2))

        res = self.executor.execute_tensor(b, concat=True)[0]
        np.testing.assert_array_equal(res, expected)

        # test order
        a = np.array([0, 1]).reshape((2, 1, 1), order='F')
        c1 = np.array([1, 2, 3]).reshape((1, 3, 1), order='F')
        c2 = np.array([-1, -2, -3, -4, -5]).reshape((1, 1, 5), order='F')

        b = choose(a, (c1, c2))
        expected = np.choose(a, (c1, c2))

        res = self.executor.execute_tensor(b, concat=True)[0]
        np.testing.assert_array_equal(res, expected)
        self.assertEqual(res.flags['C_CONTIGUOUS'],
                         expected.flags['C_CONTIGUOUS'])
        self.assertEqual(res.flags['F_CONTIGUOUS'],
                         expected.flags['F_CONTIGUOUS'])

        b = choose(a, (c1, c2), out=tensor(np.empty(res.shape, order='F')))
        expected = np.choose(a, (c1, c2), out=np.empty(res.shape, order='F'))

        res = self.executor.execute_tensor(b, concat=True)[0]
        np.testing.assert_array_equal(res, expected)
        self.assertEqual(res.flags['C_CONTIGUOUS'],
                         expected.flags['C_CONTIGUOUS'])
        self.assertEqual(res.flags['F_CONTIGUOUS'],
                         expected.flags['F_CONTIGUOUS'])

    def testUnravelExecution(self):
        a = tensor([22, 41, 37], chunk_size=1)
        t = stack(unravel_index(a, (7, 6)))

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.stack(np.unravel_index([22, 41, 37], (7, 6)))

        np.testing.assert_array_equal(res, expected)

    def testNonzeroExecution(self):
        data = np.array([[1, 0, 0], [0, 2, 0], [1, 1, 0]])
        x = tensor(data, chunk_size=2)
        t = hstack(nonzero(x))

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.hstack(np.nonzero(data))

        np.testing.assert_array_equal(res, expected)

        t = hstack((x > 1).nonzero())

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.hstack(np.nonzero(data > 1))

        np.testing.assert_array_equal(res, expected)

    def testFlatnonzeroExecution(self):
        x = arange(-2, 3, chunk_size=2)

        t = flatnonzero(x)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.flatnonzero(np.arange(-2, 3))

        np.testing.assert_equal(res, expected)

    def testFillDiagonalExecution(self):
        # 2-d
        raws = [
            np.random.rand(30, 11),
            np.random.rand(15, 15),
            np.random.rand(11, 30),
            sps.random(30, 11, density=0.1, format='csr')
        ]

        def copy(x):
            if hasattr(x, 'nnz'):
                # sparse
                return x.A
            else:
                return x.copy()

        for raw in raws:
            # test 1 chunk, wrap=False
            t = tensor(raw, chunk_size=30)
            fill_diagonal(t, 1)

            res = self.executor.execute_tensor(t, concat=True)[0]
            expected = copy(raw)
            np.fill_diagonal(expected, 1)

            np.testing.assert_array_equal(np.asarray(res), expected)

            # test 1 chunk, wrap=True
            t = tensor(raw, chunk_size=30)
            fill_diagonal(t, 1, wrap=True)

            res = self.executor.execute_tensor(t, concat=True)[0]
            expected = copy(raw)
            np.fill_diagonal(expected, 1, wrap=True)

            np.testing.assert_array_equal(np.asarray(res), expected)

            # test multiple chunks, wrap=False
            t = tensor(raw, chunk_size=(12, 4))
            fill_diagonal(t, 1)

            res = self.executor.execute_tensor(t, concat=True)[0]
            expected = copy(raw)
            np.fill_diagonal(expected, 1)

            np.testing.assert_array_equal(np.asarray(res), expected)

            t = tensor(raw, chunk_size=(4, 12))
            fill_diagonal(t, 1)

            res = self.executor.execute_tensor(t, concat=True)[0]
            expected = copy(raw)
            np.fill_diagonal(expected, 1)

            np.testing.assert_array_equal(np.asarray(res), expected)

            # test multiple chunk, val with list type
            t = tensor(raw, chunk_size=(12, 4))
            fill_diagonal(t, [1, 2, 3])

            res = self.executor.execute_tensor(t, concat=True)[0]
            expected = copy(raw)
            np.fill_diagonal(expected, [1, 2, 3])

            np.testing.assert_array_equal(np.asarray(res), expected)

            # test multiple chunk, val with tensor type
            t = tensor(raw, chunk_size=(12, 4))
            fill_diagonal(t, tensor([1, 2, 3]))

            res = self.executor.execute_tensor(t, concat=True)[0]
            expected = copy(raw)
            np.fill_diagonal(expected, [1, 2, 3])

            np.testing.assert_array_equal(np.asarray(res), expected)

            # test multiple chunks, wrap=True
            t = tensor(raw, chunk_size=(12, 4))
            fill_diagonal(t, 1, wrap=True)

            res = self.executor.execute_tensor(t, concat=True)[0]
            expected = copy(raw)
            np.fill_diagonal(expected, 1, wrap=True)

            np.testing.assert_array_equal(np.asarray(res), expected)

            t = tensor(raw, chunk_size=(4, 12))
            fill_diagonal(t, 1, wrap=True)

            res = self.executor.execute_tensor(t, concat=True)[0]
            expected = copy(raw)
            np.fill_diagonal(expected, 1, wrap=True)

            np.testing.assert_array_equal(np.asarray(res), expected)

            # test multiple chunk, val with list type
            t = tensor(raw, chunk_size=(12, 4))
            fill_diagonal(t, [1, 2, 3], wrap=True)

            res = self.executor.execute_tensor(t, concat=True)[0]
            expected = copy(raw)
            np.fill_diagonal(expected, [1, 2, 3], wrap=True)

            np.testing.assert_array_equal(np.asarray(res), expected)

            # test multiple chunk, val with tensor type
            t = tensor(raw, chunk_size=(12, 4))
            fill_diagonal(t, tensor([[1, 2], [3, 4]]), wrap=True)

            res = self.executor.execute_tensor(t, concat=True)[0]
            expected = copy(raw)
            np.fill_diagonal(expected, [1, 2, 3, 4], wrap=True)

            np.testing.assert_array_equal(np.asarray(res), expected)

        # 3-d
        raw = np.random.rand(11, 11, 11)

        expected = raw.copy()
        np.fill_diagonal(expected, 1)
        expected2 = raw.copy()
        np.fill_diagonal(expected2, 1, wrap=True)
        np.testing.assert_array_equal(expected, expected2)

        # test 1 chunk
        t = tensor(raw, chunk_size=30)
        fill_diagonal(t, 1)

        res = self.executor.execute_tensor(t, concat=True)[0]

        np.testing.assert_array_equal(res, expected)

        t = tensor(raw, chunk_size=30)
        # wrap = True does not take effect when ndim > 2
        fill_diagonal(t, 1, wrap=True)

        res = self.executor.execute_tensor(t, concat=True)[0]

        np.testing.assert_array_equal(res, expected)

        # test multiple chunk
        t = tensor(raw, chunk_size=(3, 4, 5))
        fill_diagonal(t, 1)

        res = self.executor.execute_tensor(t, concat=True)[0]

        np.testing.assert_array_equal(res, expected)

        t = tensor(raw, chunk_size=(3, 4, 5))
        # wrap = True does not take effect when ndim > 2
        fill_diagonal(t, 1, wrap=True)

        res = self.executor.execute_tensor(t, concat=True)[0]

        np.testing.assert_array_equal(res, expected)

        # test val with list type
        t = tensor(raw, chunk_size=(3, 4, 5))
        fill_diagonal(t, [[1, 2], [3, 4]])

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = raw.copy()
        np.fill_diagonal(expected, [1, 2, 3, 4])

        np.testing.assert_array_equal(res, expected)

        # test val with tensor type
        t = tensor(raw, chunk_size=(3, 4, 5))
        fill_diagonal(t, tensor([1, 2, 3]))

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = raw.copy()
        np.fill_diagonal(expected, [1, 2, 3])

        np.testing.assert_array_equal(res, expected)

        # test val with tensor type which ndim == 0
        t = tensor(raw, chunk_size=(3, 4, 5))
        fill_diagonal(t, tensor([1, 2, 3]).sum())

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = raw.copy()
        np.fill_diagonal(expected, 6)

        np.testing.assert_array_equal(res, expected)

        # test val with ndarray type which size is too long
        t = tensor(raw, chunk_size=(3, 4, 5))
        fill_diagonal(t, np.arange(20))

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = raw.copy()
        np.fill_diagonal(expected, np.arange(20))

        np.testing.assert_array_equal(res, expected)
Ejemplo n.º 3
0
class Test(TestBase):
    def setUp(self):
        super().setUp()
        self.executor = ExecutorForTest()

    @require_cudf
    def testToGPUExecution(self):
        pdf = pd.DataFrame(np.random.rand(20, 30), index=np.arange(20, 0, -1))
        df = from_pandas_df(pdf, chunk_size=(13, 21))
        cdf = to_gpu(df)

        res = self.executor.execute_dataframe(cdf, concat=True)[0]
        self.assertIsInstance(res, cudf.DataFrame)
        pd.testing.assert_frame_equal(res.to_pandas(), pdf)

        pseries = pdf.iloc[:, 0]
        series = from_pandas_series(pseries)
        cseries = series.to_gpu()

        res = self.executor.execute_dataframe(cseries, concat=True)[0]
        self.assertIsInstance(res, cudf.Series)
        pd.testing.assert_series_equal(res.to_pandas(), pseries)

    @require_cudf
    def testToCPUExecution(self):
        pdf = pd.DataFrame(np.random.rand(20, 30), index=np.arange(20, 0, -1))
        df = from_pandas_df(pdf, chunk_size=(13, 21))
        cdf = to_gpu(df)
        df2 = to_cpu(cdf)

        res = self.executor.execute_dataframe(df2, concat=True)[0]
        self.assertIsInstance(res, pd.DataFrame)
        pd.testing.assert_frame_equal(res, pdf)

        pseries = pdf.iloc[:, 0]
        series = from_pandas_series(pseries, chunk_size=(13, 21))
        cseries = to_gpu(series)
        series2 = to_cpu(cseries)

        res = self.executor.execute_dataframe(series2, concat=True)[0]
        self.assertIsInstance(res, pd.Series)
        pd.testing.assert_series_equal(res, pseries)

    def testRechunkExecution(self):
        data = pd.DataFrame(np.random.rand(8, 10))
        df = from_pandas_df(pd.DataFrame(data), chunk_size=3)
        df2 = df.rechunk((3, 4))
        res = self.executor.execute_dataframe(df2, concat=True)[0]
        pd.testing.assert_frame_equal(data, res)

        data = pd.DataFrame(np.random.rand(10, 10), index=np.random.randint(-100, 100, size=(10,)),
                            columns=[np.random.bytes(10) for _ in range(10)])
        df = from_pandas_df(data)
        df2 = df.rechunk(5)
        res = self.executor.execute_dataframe(df2, concat=True)[0]
        pd.testing.assert_frame_equal(data, res)

        # test Series rechunk execution.
        data = pd.Series(np.random.rand(10,))
        series = from_pandas_series(data)
        series2 = series.rechunk(3)
        res = self.executor.execute_dataframe(series2, concat=True)[0]
        pd.testing.assert_series_equal(data, res)

        series2 = series.rechunk(1)
        res = self.executor.execute_dataframe(series2, concat=True)[0]
        pd.testing.assert_series_equal(data, res)

        # test index rechunk execution
        data = pd.Index(np.random.rand(10,))
        index = from_pandas_index(data)
        index2 = index.rechunk(3)
        res = self.executor.execute_dataframe(index2, concat=True)[0]
        pd.testing.assert_index_equal(data, res)

        index2 = index.rechunk(1)
        res = self.executor.execute_dataframe(index2, concat=True)[0]
        pd.testing.assert_index_equal(data, res)

    def testResetIndexExecution(self):
        data = pd.DataFrame([('bird',    389.0),
                             ('bird',     24.0),
                             ('mammal',   80.5),
                             ('mammal', np.nan)],
                            index=['falcon', 'parrot', 'lion', 'monkey'],
                            columns=('class', 'max_speed'))
        df = from_pandas_df(data)
        df2 = df_reset_index(df)
        result = self.executor.execute_dataframe(df2, concat=True)[0]
        expected = data.reset_index()
        pd.testing.assert_frame_equal(result, expected)

        df = from_pandas_df(data, chunk_size=2)
        df2 = df_reset_index(df)
        result = self.executor.execute_dataframe(df2, concat=True)[0]
        expected = data.reset_index()
        pd.testing.assert_frame_equal(result, expected)

        df = from_pandas_df(data, chunk_size=1)
        df2 = df_reset_index(df, drop=True)
        result = self.executor.execute_dataframe(df2, concat=True)[0]
        expected = data.reset_index(drop=True)
        pd.testing.assert_frame_equal(result, expected)

        index = pd.MultiIndex.from_tuples([('bird', 'falcon'),
                                           ('bird', 'parrot'),
                                           ('mammal', 'lion'),
                                           ('mammal', 'monkey')],
                                          names=['class', 'name'])
        data = pd.DataFrame([('bird',    389.0),
                             ('bird',     24.0),
                             ('mammal',   80.5),
                             ('mammal', np.nan)],
                            index=index,
                            columns=('type', 'max_speed'))
        df = from_pandas_df(data, chunk_size=1)
        df2 = df_reset_index(df, level='class')
        result = self.executor.execute_dataframe(df2, concat=True)[0]
        expected = data.reset_index(level='class')
        pd.testing.assert_frame_equal(result, expected)

        columns = pd.MultiIndex.from_tuples([('speed', 'max'), ('species', 'type')])
        data.columns = columns
        df = from_pandas_df(data, chunk_size=2)
        df2 = df_reset_index(df, level='class', col_level=1, col_fill='species')
        result = self.executor.execute_dataframe(df2, concat=True)[0]
        expected = data.reset_index(level='class', col_level=1, col_fill='species')
        pd.testing.assert_frame_equal(result, expected)

        # Test Series

        s = pd.Series([1, 2, 3, 4], name='foo',
                      index=pd.Index(['a', 'b', 'c', 'd'], name='idx'))

        series = from_pandas_series(s)
        s2 = series_reset_index(series, name='bar')
        result = self.executor.execute_dataframe(s2, concat=True)[0]
        expected = s.reset_index(name='bar')
        pd.testing.assert_frame_equal(result, expected)

        series = from_pandas_series(s, chunk_size=2)
        s2 = series_reset_index(series, drop=True)
        result = self.executor.execute_dataframe(s2, concat=True)[0]
        expected = s.reset_index(drop=True)
        pd.testing.assert_series_equal(result, expected)

        # Test Unknown shape
        sess = new_session()
        data1 = pd.DataFrame(np.random.rand(10, 3), index=[0, 10, 2, 3, 4, 5, 6, 7, 8, 9])
        df1 = from_pandas_df(data1, chunk_size=5)
        data2 = pd.DataFrame(np.random.rand(10, 3), index=[11, 1, 2, 5, 7, 6, 8, 9, 10, 3])
        df2 = from_pandas_df(data2, chunk_size=6)
        df = (df1 + df2).reset_index()
        result = sess.run(df)
        pd.testing.assert_index_equal(result.index, pd.RangeIndex(12))
        # Inconsistent with Pandas when input dataframe's shape is unknown.
        result = result.sort_values(by=result.columns[0])
        expected = (data1 + data2).reset_index()
        np.testing.assert_array_equal(result.to_numpy(), expected.to_numpy())

        data1 = pd.Series(np.random.rand(10,), index=[0, 10, 2, 3, 4, 5, 6, 7, 8, 9])
        series1 = from_pandas_series(data1, chunk_size=3)
        data2 = pd.Series(np.random.rand(10,), index=[11, 1, 2, 5, 7, 6, 8, 9, 10, 3])
        series2 = from_pandas_series(data2, chunk_size=3)
        df = (series1 + series2).reset_index()
        result = sess.run(df)
        pd.testing.assert_index_equal(result.index, pd.RangeIndex(12))
        # Inconsistent with Pandas when input dataframe's shape is unknown.
        result = result.sort_values(by=result.columns[0])
        expected = (data1 + data2).reset_index()
        np.testing.assert_array_equal(result.to_numpy(), expected.to_numpy())

    def testSeriesMapExecution(self):
        raw = pd.Series(np.arange(10))
        s = from_pandas_series(raw, chunk_size=7)

        with self.assertRaises(ValueError):
            # cannot infer dtype, the inferred is int,
            # but actually it is float
            # just due to nan
            s.map({5: 10})

        r = s.map({5: 10}, dtype=float)
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = raw.map({5: 10})
        pd.testing.assert_series_equal(result, expected)

        r = s.map({i: 10 + i for i in range(7)}, dtype=float)
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = raw.map({i: 10 + i for i in range(7)})
        pd.testing.assert_series_equal(result, expected)

        r = s.map({5: 10}, dtype=float, na_action='ignore')
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = raw.map({5: 10}, na_action='ignore')
        pd.testing.assert_series_equal(result, expected)

        # dtype can be inferred
        r = s.map({5: 10.})
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = raw.map({5: 10.})
        pd.testing.assert_series_equal(result, expected)

        r = s.map(lambda x: x + 1, dtype=int)
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = raw.map(lambda x: x + 1)
        pd.testing.assert_series_equal(result, expected)

        def f(x: int) -> float:
            return x + 1.

        # dtype can be inferred for function
        r = s.map(f)
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = raw.map(lambda x: x + 1.)
        pd.testing.assert_series_equal(result, expected)

        # test arg is a md.Series
        raw2 = pd.Series([10], index=[5])
        s2 = from_pandas_series(raw2)

        r = s.map(s2, dtype=float)
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = raw.map(raw2)
        pd.testing.assert_series_equal(result, expected)

        # test arg is a md.Series, and dtype can be inferred
        raw2 = pd.Series([10.], index=[5])
        s2 = from_pandas_series(raw2)

        r = s.map(s2)
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = raw.map(raw2)
        pd.testing.assert_series_equal(result, expected)

        # test str
        raw = pd.Series(['a', 'b', 'c', 'd'])
        s = from_pandas_series(raw, chunk_size=2)

        r = s.map({'c': 'e'})
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = raw.map({'c': 'e'})
        pd.testing.assert_series_equal(result, expected)

    def testDescribeExecution(self):
        s_raw = pd.Series(np.random.rand(10))

        # test one chunk
        series = from_pandas_series(s_raw, chunk_size=10)

        r = series.describe()
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = s_raw.describe()
        pd.testing.assert_series_equal(result, expected)

        r = series.describe(percentiles=[])
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = s_raw.describe(percentiles=[])
        pd.testing.assert_series_equal(result, expected)

        # test multi chunks
        series = from_pandas_series(s_raw, chunk_size=3)

        r = series.describe()
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = s_raw.describe()
        pd.testing.assert_series_equal(result, expected)

        r = series.describe(percentiles=[])
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = s_raw.describe(percentiles=[])
        pd.testing.assert_series_equal(result, expected)

        df_raw = pd.DataFrame(np.random.rand(10, 4), columns=list('abcd'))
        df_raw['e'] = np.random.randint(100, size=10)

        # test one chunk
        df = from_pandas_df(df_raw, chunk_size=10)

        r = df.describe()
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = df_raw.describe()
        pd.testing.assert_frame_equal(result, expected)

        r = series.describe(percentiles=[], include=np.float64)
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = s_raw.describe(percentiles=[], include=np.float64)
        pd.testing.assert_series_equal(result, expected)

        # test multi chunks
        df = from_pandas_df(df_raw, chunk_size=3)

        r = df.describe()
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = df_raw.describe()
        pd.testing.assert_frame_equal(result, expected)

        r = df.describe(percentiles=[], include=np.float64)
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = df_raw.describe(percentiles=[], include=np.float64)
        pd.testing.assert_frame_equal(result, expected)

        with self.assertRaises(ValueError):
            df.describe(percentiles=[1.1])

    def testDataFrameFillNAExecution(self):
        df_raw = pd.DataFrame(np.nan, index=range(0, 20), columns=list('ABCDEFGHIJ'))
        for _ in range(20):
            df_raw.iloc[random.randint(0, 19), random.randint(0, 9)] = random.randint(0, 99)

        value_df_raw = pd.DataFrame(np.random.randint(0, 100, (10, 7)).astype(np.float32),
                                    columns=list('ABCDEFG'))

        # test DataFrame single chunk with numeric fill
        df = from_pandas_df(df_raw)
        r = df.fillna(1)
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = df_raw.fillna(1)
        pd.testing.assert_frame_equal(result, expected)

        # test DataFrame single chunk with value as single chunk
        df = from_pandas_df(df_raw)
        value_df = from_pandas_df(value_df_raw)
        r = df.fillna(value_df)
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = df_raw.fillna(value_df_raw)
        pd.testing.assert_frame_equal(result, expected)

        # test chunked with numeric fill
        df = from_pandas_df(df_raw, chunk_size=3)
        r = df.fillna(1)
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = df_raw.fillna(1)
        pd.testing.assert_frame_equal(result, expected)

        # test inplace tile
        df = from_pandas_df(df_raw, chunk_size=3)
        df.fillna(1, inplace=True)
        result = self.executor.execute_dataframe(df, concat=True)[0]
        expected = df_raw.fillna(1)
        pd.testing.assert_frame_equal(result, expected)

        # test forward fill in axis=0 without limit
        df = from_pandas_df(df_raw, chunk_size=3)
        r = df.fillna(method='pad')
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = df_raw.fillna(method='pad')
        pd.testing.assert_frame_equal(result, expected)

        # test backward fill in axis=0 without limit
        df = from_pandas_df(df_raw, chunk_size=3)
        r = df.fillna(method='backfill')
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = df_raw.fillna(method='backfill')
        pd.testing.assert_frame_equal(result, expected)

        # test forward fill in axis=1 without limit
        df = from_pandas_df(df_raw, chunk_size=3)
        r = df.ffill(axis=1)
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = df_raw.ffill(axis=1)
        pd.testing.assert_frame_equal(result, expected)

        # test backward fill in axis=1 without limit
        df = from_pandas_df(df_raw, chunk_size=3)
        r = df.bfill(axis=1)
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = df_raw.bfill(axis=1)
        pd.testing.assert_frame_equal(result, expected)

        # test fill with dataframe
        df = from_pandas_df(df_raw, chunk_size=3)
        value_df = from_pandas_df(value_df_raw, chunk_size=4)
        r = df.fillna(value_df)
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = df_raw.fillna(value_df_raw)
        pd.testing.assert_frame_equal(result, expected)

        # test fill with series
        value_series_raw = pd.Series(np.random.randint(0, 100, (10,)).astype(np.float32),
                                     index=list('ABCDEFGHIJ'))
        df = from_pandas_df(df_raw, chunk_size=3)
        value_series = from_pandas_series(value_series_raw, chunk_size=4)
        r = df.fillna(value_series)
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = df_raw.fillna(value_series_raw)
        pd.testing.assert_frame_equal(result, expected)

    def testSeriesFillNAExecution(self):
        series_raw = pd.Series(np.nan, index=range(20))
        for _ in range(3):
            series_raw.iloc[random.randint(0, 19)] = random.randint(0, 99)
        value_series_raw = pd.Series(np.random.randint(0, 100, (10,)).astype(np.float32))

        series = from_pandas_series(series_raw)
        r = series.fillna(1)
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = series_raw.fillna(1)
        pd.testing.assert_series_equal(result, expected)

        # test DataFrame single chunk with value as single chunk
        series = from_pandas_series(series_raw)
        value_series = from_pandas_series(value_series_raw)
        r = series.fillna(value_series)
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = series_raw.fillna(value_series_raw)
        pd.testing.assert_series_equal(result, expected)

        # test chunked with numeric fill
        series = from_pandas_series(series_raw, chunk_size=3)
        r = series.fillna(1)
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = series_raw.fillna(1)
        pd.testing.assert_series_equal(result, expected)

        # test inplace tile
        series = from_pandas_series(series_raw, chunk_size=3)
        series.fillna(1, inplace=True)
        result = self.executor.execute_dataframe(series, concat=True)[0]
        expected = series_raw.fillna(1)
        pd.testing.assert_series_equal(result, expected)

        # test forward fill in axis=0 without limit
        series = from_pandas_series(series_raw, chunk_size=3)
        r = series.fillna(method='pad')
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = series_raw.fillna(method='pad')
        pd.testing.assert_series_equal(result, expected)

        # test backward fill in axis=0 without limit
        series = from_pandas_series(series_raw, chunk_size=3)
        r = series.fillna(method='backfill')
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = series_raw.fillna(method='backfill')
        pd.testing.assert_series_equal(result, expected)

        # test fill with series
        series = from_pandas_series(series_raw, chunk_size=3)
        value_df = from_pandas_series(value_series_raw, chunk_size=4)
        r = series.fillna(value_df)
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = series_raw.fillna(value_series_raw)
        pd.testing.assert_series_equal(result, expected)

    def testDataFrameApplyExecute(self):
        cols = [chr(ord('A') + i) for i in range(10)]
        df_raw = pd.DataFrame(dict((c, [i ** 2 for i in range(20)]) for c in cols))

        old_chunk_store_limit = options.chunk_store_limit
        try:
            options.chunk_store_limit = 20

            df = from_pandas_df(df_raw, chunk_size=5)

            r = df.apply('ffill')
            result = self.executor.execute_dataframe(r, concat=True)[0]
            expected = df_raw.apply('ffill')
            pd.testing.assert_frame_equal(result, expected)

            r = df.apply(['sum', 'max'])
            result = self.executor.execute_dataframe(r, concat=True)[0]
            expected = df_raw.apply(['sum', 'max'])
            pd.testing.assert_frame_equal(result, expected)

            r = df.apply(np.sqrt)
            result = self.executor.execute_dataframe(r, concat=True)[0]
            expected = df_raw.apply(np.sqrt)
            pd.testing.assert_frame_equal(result, expected)

            r = df.apply(lambda x: pd.Series([1, 2]))
            result = self.executor.execute_dataframe(r, concat=True)[0]
            expected = df_raw.apply(lambda x: pd.Series([1, 2]))
            pd.testing.assert_frame_equal(result, expected)

            r = df.apply(np.sum, axis='index')
            result = self.executor.execute_dataframe(r, concat=True)[0]
            expected = df_raw.apply(np.sum, axis='index')
            pd.testing.assert_series_equal(result, expected)

            r = df.apply(np.sum, axis='columns')
            result = self.executor.execute_dataframe(r, concat=True)[0]
            expected = df_raw.apply(np.sum, axis='columns')
            pd.testing.assert_series_equal(result, expected)

            r = df.apply(lambda x: [1, 2], axis=1)
            result = self.executor.execute_dataframe(r, concat=True)[0]
            expected = df_raw.apply(lambda x: [1, 2], axis=1)
            pd.testing.assert_series_equal(result, expected)

            r = df.apply(lambda x: pd.Series([1, 2], index=['foo', 'bar']), axis=1)
            result = self.executor.execute_dataframe(r, concat=True)[0]
            expected = df_raw.apply(lambda x: pd.Series([1, 2], index=['foo', 'bar']), axis=1)
            pd.testing.assert_frame_equal(result, expected)

            r = df.apply(lambda x: [1, 2], axis=1, result_type='expand')
            result = self.executor.execute_dataframe(r, concat=True)[0]
            expected = df_raw.apply(lambda x: [1, 2], axis=1, result_type='expand')
            pd.testing.assert_frame_equal(result, expected)

            r = df.apply(lambda x: list(range(10)), axis=1, result_type='reduce')
            result = self.executor.execute_dataframe(r, concat=True)[0]
            expected = df_raw.apply(lambda x: list(range(10)), axis=1, result_type='reduce')
            pd.testing.assert_series_equal(result, expected)

            r = df.apply(lambda x: list(range(10)), axis=1, result_type='broadcast')
            result = self.executor.execute_dataframe(r, concat=True)[0]
            expected = df_raw.apply(lambda x: list(range(10)), axis=1, result_type='broadcast')
            pd.testing.assert_frame_equal(result, expected)
        finally:
            options.chunk_store_limit = old_chunk_store_limit

    def testSeriesApplyExecute(self):
        idxes = [chr(ord('A') + i) for i in range(20)]
        s_raw = pd.Series([i ** 2 for i in range(20)], index=idxes)

        series = from_pandas_series(s_raw, chunk_size=5)

        r = series.apply('add', args=(1,))
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = s_raw.apply('add', args=(1,))
        pd.testing.assert_series_equal(result, expected)

        r = series.apply(['sum', 'max'])
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = s_raw.apply(['sum', 'max'])
        pd.testing.assert_series_equal(result, expected)

        r = series.apply(np.sqrt)
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = s_raw.apply(np.sqrt)
        pd.testing.assert_series_equal(result, expected)

        r = series.apply('sqrt')
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = s_raw.apply('sqrt')
        pd.testing.assert_series_equal(result, expected)

        r = series.apply(lambda x: [x, x + 1], convert_dtype=False)
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = s_raw.apply(lambda x: [x, x + 1], convert_dtype=False)
        pd.testing.assert_series_equal(result, expected)

    def testTransformExecute(self):
        cols = [chr(ord('A') + i) for i in range(10)]
        df_raw = pd.DataFrame(dict((c, [i ** 2 for i in range(20)]) for c in cols))

        idx_vals = [chr(ord('A') + i) for i in range(20)]
        s_raw = pd.Series([i ** 2 for i in range(20)], index=idx_vals)

        def rename_fn(f, new_name):
            f.__name__ = new_name
            return f

        old_chunk_store_limit = options.chunk_store_limit
        try:
            options.chunk_store_limit = 20

            # DATAFRAME CASES
            df = from_pandas_df(df_raw, chunk_size=5)

            # test transform scenarios on data frames
            r = df.transform(lambda x: list(range(len(x))))
            result = self.executor.execute_dataframe(r, concat=True)[0]
            expected = df_raw.transform(lambda x: list(range(len(x))))
            pd.testing.assert_frame_equal(result, expected)

            r = df.transform(lambda x: list(range(len(x))), axis=1)
            result = self.executor.execute_dataframe(r, concat=True)[0]
            expected = df_raw.transform(lambda x: list(range(len(x))), axis=1)
            pd.testing.assert_frame_equal(result, expected)

            r = df.transform(['cumsum', 'cummax', lambda x: x + 1])
            result = self.executor.execute_dataframe(r, concat=True)[0]
            expected = df_raw.transform(['cumsum', 'cummax', lambda x: x + 1])
            pd.testing.assert_frame_equal(result, expected)

            fn_dict = OrderedDict([
                ('A', 'cumsum'),
                ('D', ['cumsum', 'cummax']),
                ('F', lambda x: x + 1),
            ])
            r = df.transform(fn_dict)
            result = self.executor.execute_dataframe(r, concat=True)[0]
            expected = df_raw.transform(fn_dict)
            pd.testing.assert_frame_equal(result, expected)

            # test agg scenarios on series
            r = df.transform(lambda x: x.iloc[:-1], _call_agg=True)
            result = self.executor.execute_dataframe(r, concat=True)[0]
            expected = df_raw.agg(lambda x: x.iloc[:-1])
            pd.testing.assert_frame_equal(result, expected)

            r = df.transform(lambda x: x.iloc[:-1], axis=1, _call_agg=True)
            result = self.executor.execute_dataframe(r, concat=True)[0]
            expected = df_raw.agg(lambda x: x.iloc[:-1], axis=1)
            pd.testing.assert_frame_equal(result, expected)

            fn_list = [rename_fn(lambda x: x.iloc[1:].reset_index(drop=True), 'f1'),
                       lambda x: x.iloc[:-1].reset_index(drop=True)]
            r = df.transform(fn_list, _call_agg=True)
            result = self.executor.execute_dataframe(r, concat=True)[0]
            expected = df_raw.agg(fn_list)
            pd.testing.assert_frame_equal(result, expected)

            r = df.transform(lambda x: x.sum(), _call_agg=True)
            result = self.executor.execute_dataframe(r, concat=True)[0]
            expected = df_raw.agg(lambda x: x.sum())
            pd.testing.assert_series_equal(result, expected)

            fn_dict = OrderedDict([
                ('A', rename_fn(lambda x: x.iloc[1:].reset_index(drop=True), 'f1')),
                ('D', [rename_fn(lambda x: x.iloc[1:].reset_index(drop=True), 'f1'),
                       lambda x: x.iloc[:-1].reset_index(drop=True)]),
                ('F', lambda x: x.iloc[:-1].reset_index(drop=True)),
            ])
            r = df.transform(fn_dict, _call_agg=True)
            result = self.executor.execute_dataframe(r, concat=True)[0]
            expected = df_raw.agg(fn_dict)
            pd.testing.assert_frame_equal(result, expected)

            # SERIES CASES
            series = from_pandas_series(s_raw, chunk_size=5)

            # test transform scenarios on series
            r = series.transform(lambda x: x + 1)
            result = self.executor.execute_dataframe(r, concat=True)[0]
            expected = s_raw.transform(lambda x: x + 1)
            pd.testing.assert_series_equal(result, expected)

            r = series.transform(['cumsum', lambda x: x + 1])
            result = self.executor.execute_dataframe(r, concat=True)[0]
            expected = s_raw.transform(['cumsum', lambda x: x + 1])
            pd.testing.assert_frame_equal(result, expected)
        finally:
            options.chunk_store_limit = old_chunk_store_limit

    def testStringMethodExecution(self):
        s = pd.Series(['s1,s2', 'ef,', 'dd', np.nan])
        s2 = pd.concat([s, s, s])

        series = from_pandas_series(s, chunk_size=2)
        series2 = from_pandas_series(s2, chunk_size=2)

        # test getitem
        r = series.str[:3]
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = s.str[:3]
        pd.testing.assert_series_equal(result, expected)

        # test split, expand=False
        r = series.str.split(',', n=2)
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = s.str.split(',', n=2)
        pd.testing.assert_series_equal(result, expected)

        # test split, expand=True
        r = series.str.split(',', expand=True, n=1)
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = s.str.split(',', expand=True, n=1)
        pd.testing.assert_frame_equal(result, expected)

        # test rsplit
        r = series.str.rsplit(',', expand=True, n=1)
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = s.str.rsplit(',', expand=True, n=1)
        pd.testing.assert_frame_equal(result, expected)

        # test cat all data
        r = series2.str.cat(sep='/', na_rep='e')
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = s2.str.cat(sep='/', na_rep='e')
        self.assertEqual(result, expected)

        # test cat list
        r = series.str.cat(['a', 'b', np.nan, 'c'])
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = s.str.cat(['a', 'b', np.nan, 'c'])
        pd.testing.assert_series_equal(result, expected)

        # test cat series
        r = series.str.cat(series.str.capitalize(), join='outer')
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = s.str.cat(s.str.capitalize(), join='outer')
        pd.testing.assert_series_equal(result, expected)

        # test extractall
        r = series.str.extractall(r"(?P<letter>[ab])(?P<digit>\d)")
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = s.str.extractall(r"(?P<letter>[ab])(?P<digit>\d)")
        pd.testing.assert_frame_equal(result, expected)

        # test extract, expand=False
        r = series.str.extract(r'[ab](\d)', expand=False)
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = s.str.extract(r'[ab](\d)', expand=False)
        pd.testing.assert_series_equal(result, expected)

        # test extract, expand=True
        r = series.str.extract(r'[ab](\d)', expand=True)
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = s.str.extract(r'[ab](\d)', expand=True)
        pd.testing.assert_frame_equal(result, expected)

    def testDatetimeMethodExecution(self):
        # test datetime
        s = pd.Series([pd.Timestamp('2020-1-1'),
                       pd.Timestamp('2020-2-1'),
                       np.nan])
        series = from_pandas_series(s, chunk_size=2)

        r = series.dt.year
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = s.dt.year
        pd.testing.assert_series_equal(result, expected)

        r = series.dt.strftime('%m-%d-%Y')
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = s.dt.strftime('%m-%d-%Y')
        pd.testing.assert_series_equal(result, expected)

        # test timedelta
        s = pd.Series([pd.Timedelta('1 days'),
                       pd.Timedelta('3 days'),
                       np.nan])
        series = from_pandas_series(s, chunk_size=2)

        r = series.dt.days
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = s.dt.days
        pd.testing.assert_series_equal(result, expected)

    def testSeriesIsin(self):
        # one chunk in multiple chunks
        a = pd.Series([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
        b = pd.Series([2, 1, 9, 3])
        sa = from_pandas_series(a, chunk_size=10)
        sb = from_pandas_series(b, chunk_size=2)

        result = self.executor.execute_dataframe(sa.isin(sb), concat=True)[0]
        expected = a.isin(b)
        pd.testing.assert_series_equal(result, expected)

        # multiple chunk in one chunks
        a = pd.Series([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
        b = pd.Series([2, 1, 9, 3])
        sa = from_pandas_series(a, chunk_size=2)
        sb = from_pandas_series(b, chunk_size=4)

        result = self.executor.execute_dataframe(sa.isin(sb), concat=True)[0]
        expected = a.isin(b)
        pd.testing.assert_series_equal(result, expected)

        # multiple chunk in multiple chunks
        a = pd.Series([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
        b = pd.Series([2, 1, 9, 3])
        sa = from_pandas_series(a, chunk_size=2)
        sb = from_pandas_series(b, chunk_size=2)

        result = self.executor.execute_dataframe(sa.isin(sb), concat=True)[0]
        expected = a.isin(b)
        pd.testing.assert_series_equal(result, expected)

        a = pd.Series([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
        b = pd.Series([2, 1, 9, 3])
        sa = from_pandas_series(a, chunk_size=2)

        result = self.executor.execute_dataframe(sa.isin(b), concat=True)[0]
        expected = a.isin(b)
        pd.testing.assert_series_equal(result, expected)

        a = pd.Series([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
        b = np.array([2, 1, 9, 3])
        sa = from_pandas_series(a, chunk_size=2)
        sb = tensor(b, chunk_size=3)

        result = self.executor.execute_dataframe(sa.isin(sb), concat=True)[0]
        expected = a.isin(b)
        pd.testing.assert_series_equal(result, expected)

        a = pd.Series([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
        b = {2, 1, 9, 3}  # set
        sa = from_pandas_series(a, chunk_size=2)

        result = self.executor.execute_dataframe(sa.isin(b), concat=True)[0]
        expected = a.isin(b)
        pd.testing.assert_series_equal(result, expected)

    def testCheckNA(self):
        df_raw = pd.DataFrame(np.nan, index=range(0, 20), columns=list('ABCDEFGHIJ'))
        for _ in range(20):
            df_raw.iloc[random.randint(0, 19), random.randint(0, 9)] = random.randint(0, 99)

        df = from_pandas_df(df_raw, chunk_size=4)

        pd.testing.assert_frame_equal(self.executor.execute_dataframe(df.isna(), concat=True)[0],
                                      df_raw.isna())
        pd.testing.assert_frame_equal(self.executor.execute_dataframe(df.notna(), concat=True)[0],
                                      df_raw.notna())

        series_raw = pd.Series(np.nan, index=range(20))
        for _ in range(3):
            series_raw.iloc[random.randint(0, 19)] = random.randint(0, 99)

        series = from_pandas_series(series_raw, chunk_size=4)

        pd.testing.assert_series_equal(self.executor.execute_dataframe(series.isna(), concat=True)[0],
                                       series_raw.isna())
        pd.testing.assert_series_equal(self.executor.execute_dataframe(series.notna(), concat=True)[0],
                                       series_raw.notna())

    def testDropNA(self):
        # dataframe cases
        df_raw = pd.DataFrame(np.nan, index=range(0, 20), columns=list('ABCDEFGHIJ'))
        for _ in range(30):
            df_raw.iloc[random.randint(0, 19), random.randint(0, 9)] = random.randint(0, 99)
        for rowid in range(random.randint(1, 5)):
            row = random.randint(0, 19)
            for idx in range(0, 10):
                df_raw.iloc[row, idx] = random.randint(0, 99)

        # only one chunk in columns, can run dropna directly
        r = from_pandas_df(df_raw, chunk_size=(4, 10)).dropna()
        pd.testing.assert_frame_equal(self.executor.execute_dataframe(r, concat=True)[0],
                                      df_raw.dropna())

        # multiple chunks in columns, count() will be called first
        r = from_pandas_df(df_raw, chunk_size=4).dropna()
        pd.testing.assert_frame_equal(self.executor.execute_dataframe(r, concat=True)[0],
                                      df_raw.dropna())

        r = from_pandas_df(df_raw, chunk_size=4).dropna(how='all')
        pd.testing.assert_frame_equal(self.executor.execute_dataframe(r, concat=True)[0],
                                      df_raw.dropna(how='all'))

        r = from_pandas_df(df_raw, chunk_size=4).dropna(subset=list('ABFI'))
        pd.testing.assert_frame_equal(self.executor.execute_dataframe(r, concat=True)[0],
                                      df_raw.dropna(subset=list('ABFI')))

        r = from_pandas_df(df_raw, chunk_size=4).dropna(how='all', subset=list('BDHJ'))
        pd.testing.assert_frame_equal(self.executor.execute_dataframe(r, concat=True)[0],
                                      df_raw.dropna(how='all', subset=list('BDHJ')))

        r = from_pandas_df(df_raw, chunk_size=4)
        r.dropna(how='all', inplace=True)
        pd.testing.assert_frame_equal(self.executor.execute_dataframe(r, concat=True)[0],
                                      df_raw.dropna(how='all'))

        # series cases
        series_raw = pd.Series(np.nan, index=range(20))
        for _ in range(10):
            series_raw.iloc[random.randint(0, 19)] = random.randint(0, 99)

        r = from_pandas_series(series_raw, chunk_size=4).dropna()
        pd.testing.assert_series_equal(self.executor.execute_dataframe(r, concat=True)[0],
                                       series_raw.dropna())

        r = from_pandas_series(series_raw, chunk_size=4)
        r.dropna(inplace=True)
        pd.testing.assert_series_equal(self.executor.execute_dataframe(r, concat=True)[0],
                                       series_raw.dropna())

    def testCutExecution(self):
        rs = np.random.RandomState(0)
        raw = rs.random(15) * 1000
        s = pd.Series(raw, index=['i{}'.format(i) for i in range(15)])
        bins = [10, 100, 500]
        ii = pd.interval_range(10, 500, 3)
        labels = ['a', 'b']

        t = tensor(raw, chunk_size=4)
        series = from_pandas_series(s, chunk_size=4)
        iii = from_pandas_index(ii, chunk_size=2)

        # cut on Series
        r = cut(series, bins)
        result = self.executor.execute_dataframe(r, concat=True)[0]
        pd.testing.assert_series_equal(result, pd.cut(s, bins))

        r, b = cut(series, bins, retbins=True)
        r_result = self.executor.execute_dataframe(r, concat=True)[0]
        b_result = self.executor.execute_tensor(b, concat=True)[0]
        r_expected, b_expected = pd.cut(s, bins, retbins=True)
        pd.testing.assert_series_equal(r_result, r_expected)
        np.testing.assert_array_equal(b_result, b_expected)

        # cut on tensor
        r = cut(t, bins)
        # result and expected is array whose dtype is CategoricalDtype
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = pd.cut(raw, bins)
        self.assertEqual(len(result), len(expected))
        for r, e in zip(result, expected):
            np.testing.assert_equal(r, e)

        # one chunk
        r = cut(s, tensor(bins, chunk_size=2), right=False, include_lowest=True)
        result = self.executor.execute_dataframe(r, concat=True)[0]
        pd.testing.assert_series_equal(result, pd.cut(s, bins, right=False, include_lowest=True))

        # test labels
        r = cut(t, bins, labels=labels)
        # result and expected is array whose dtype is CategoricalDtype
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = pd.cut(raw, bins, labels=labels)
        self.assertEqual(len(result), len(expected))
        for r, e in zip(result, expected):
            np.testing.assert_equal(r, e)

        r = cut(t, bins, labels=False)
        # result and expected is array whose dtype is CategoricalDtype
        result = self.executor.execute_tensor(r, concat=True)[0]
        expected = pd.cut(raw, bins, labels=False)
        np.testing.assert_array_equal(result, expected)

        # test labels which is tensor
        labels_t = tensor(['a', 'b'], chunk_size=1)
        r = cut(raw, bins, labels=labels_t, include_lowest=True)
        # result and expected is array whose dtype is CategoricalDtype
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = pd.cut(raw, bins, labels=labels, include_lowest=True)
        self.assertEqual(len(result), len(expected))
        for r, e in zip(result, expected):
            np.testing.assert_equal(r, e)

        # test labels=False
        r, b = cut(raw, ii, labels=False, retbins=True)
        # result and expected is array whose dtype is CategoricalDtype
        r_result = self.executor.execute_tileable(r, concat=True)[0]
        b_result = self.executor.execute_tileable(b, concat=True)[0]
        r_expected, b_expected = pd.cut(raw, ii, labels=False, retbins=True)
        for r, e in zip(r_result, r_expected):
            np.testing.assert_equal(r, e)
        pd.testing.assert_index_equal(b_result, b_expected)

        # test bins which is md.IntervalIndex
        r, b = cut(series, iii, labels=tensor(labels, chunk_size=1), retbins=True)
        r_result = self.executor.execute_dataframe(r, concat=True)[0]
        b_result = self.executor.execute_dataframe(b, concat=True)[0]
        r_expected, b_expected = pd.cut(s, ii, labels=labels, retbins=True)
        pd.testing.assert_series_equal(r_result, r_expected)
        pd.testing.assert_index_equal(b_result, b_expected)

        # test duplicates
        bins2 = [0, 2, 4, 6, 10, 10]
        r, b = cut(s, bins2, labels=False, retbins=True,
                   right=False, duplicates='drop')
        r_result = self.executor.execute_dataframe(r, concat=True)[0]
        b_result = self.executor.execute_tensor(b, concat=True)[0]
        r_expected, b_expected = pd.cut(s, bins2, labels=False, retbins=True,
                                        right=False, duplicates='drop')
        pd.testing.assert_series_equal(r_result, r_expected)
        np.testing.assert_array_equal(b_result, b_expected)

        ctx, executor = self._create_test_context(self.executor)
        with ctx:
            # test integer bins
            r = cut(series, 3)
            result = executor.execute_dataframes([r])[0]
            pd.testing.assert_series_equal(result, pd.cut(s, 3))

            r, b = cut(series, 3, right=False, retbins=True)
            r_result, b_result = executor.execute_dataframes([r, b])
            r_expected, b_expected = pd.cut(s, 3, right=False, retbins=True)
            pd.testing.assert_series_equal(r_result, r_expected)
            np.testing.assert_array_equal(b_result, b_expected)

            # test min max same
            s2 = pd.Series([1.1] * 15)
            r = cut(s2, 3)
            result = executor.execute_dataframes([r])[0]
            pd.testing.assert_series_equal(result, pd.cut(s2, 3))

            # test inf exist
            s3 = s2.copy()
            s3[-1] = np.inf
            with self.assertRaises(ValueError):
                executor.execute_dataframes([cut(s3, 3)])

    def testShiftExecution(self):
        # test dataframe
        rs = np.random.RandomState(0)
        raw = pd.DataFrame(rs.randint(1000, size=(10, 8)),
                           columns=['col' + str(i + 1) for i in range(8)])

        df = from_pandas_df(raw, chunk_size=5)

        for periods in (2, -2, 6, -6):
            for axis in (0, 1):
                for fill_value in (None, 0, 1.):
                    r = df.shift(periods=periods, axis=axis,
                                 fill_value=fill_value)

                    try:
                        result = self.executor.execute_dataframe(r, concat=True)[0]
                        expected = raw.shift(periods=periods, axis=axis,
                                             fill_value=fill_value)
                        pd.testing.assert_frame_equal(result, expected)
                    except AssertionError as e:  # pragma: no cover
                        raise AssertionError(
                            'Failed when periods: {}, axis: {}, fill_value: {}'.format(
                                periods, axis, fill_value
                            )) from e

        raw2 = raw.copy()
        raw2.index = pd.date_range('2020-1-1', periods=10)
        raw2.columns = pd.date_range('2020-3-1', periods=8)

        df2 = from_pandas_df(raw2, chunk_size=5)

        # test freq not None
        for periods in (2, -2):
            for axis in (0, 1):
                for fill_value in (None, 0, 1.):
                    r = df2.shift(periods=periods, freq='D', axis=axis,
                                  fill_value=fill_value)

                    try:
                        result = self.executor.execute_dataframe(r, concat=True)[0]
                        expected = raw2.shift(periods=periods, freq='D', axis=axis,
                                              fill_value=fill_value)
                        pd.testing.assert_frame_equal(result, expected)
                    except AssertionError as e:  # pragma: no cover
                        raise AssertionError(
                            'Failed when periods: {}, axis: {}, fill_value: {}'.format(
                                periods, axis, fill_value
                            )) from e

        # test tshift
        r = df2.tshift(periods=1)
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = raw2.tshift(periods=1)
        pd.testing.assert_frame_equal(result, expected)

        with self.assertRaises(ValueError):
            _ = df.tshift(periods=1)

        # test series
        s = raw.iloc[:, 0]

        series = from_pandas_series(s, chunk_size=5)
        for periods in (0, 2, -2, 6, -6):
            for fill_value in (None, 0, 1.):
                r = series.shift(periods=periods, fill_value=fill_value)

                try:
                    result = self.executor.execute_dataframe(r, concat=True)[0]
                    expected = s.shift(periods=periods, fill_value=fill_value)
                    pd.testing.assert_series_equal(result, expected)
                except AssertionError as e:  # pragma: no cover
                    raise AssertionError(
                        'Failed when periods: {}, fill_value: {}'.format(
                            periods, fill_value
                        )) from e

        s2 = raw2.iloc[:, 0]

        # test freq not None
        series2 = from_pandas_series(s2, chunk_size=5)
        for periods in (2, -2):
            for fill_value in (None, 0, 1.):
                r = series2.shift(periods=periods, freq='D', fill_value=fill_value)

                try:
                    result = self.executor.execute_dataframe(r, concat=True)[0]
                    expected = s2.shift(periods=periods, freq='D', fill_value=fill_value)
                    pd.testing.assert_series_equal(result, expected)
                except AssertionError as e:  # pragma: no cover
                    raise AssertionError(
                        'Failed when periods: {}, fill_value: {}'.format(
                            periods, fill_value
                        )) from e
Ejemplo n.º 4
0
class Test(unittest.TestCase):
    def setUp(self) -> None:
        self.executor = ExecutorForTest('numpy')

    def testCheckNonNegativeThenReturnValueExecution(self):
        raw = np.random.randint(10, size=(10, 5))
        c = mt.tensor(raw, chunk_size=(3, 2))

        r = check_non_negative_then_return_value(c, c, 'sth')
        result = self.executor.execute_tileable(r, concat=True)[0]
        np.testing.assert_array_equal(result, raw)

        raw = raw.copy()
        raw[1, 3] = -1
        c = mt.tensor(raw, chunk_size=(3, 2))

        r = check_non_negative_then_return_value(c, c, 'sth')
        with self.assertRaises(ValueError):
            _ = self.executor.execute_tileable(r, concat=True)[0]

        raw = sps.random(10, 5, density=.3, format='csr')
        c = mt.tensor(raw, chunk_size=(3, 2))

        r = check_non_negative_then_return_value(c, c, 'sth')
        result = self.executor.execute_tileable(r, concat=True)[0]
        np.testing.assert_array_equal(result.toarray(), raw.A)

        raw = raw.copy()
        raw[1, 3] = -1
        c = mt.tensor(raw, chunk_size=(3, 2))

        r = check_non_negative_then_return_value(c, c, 'sth')
        with self.assertRaises(ValueError):
            _ = self.executor.execute_tileable(r, concat=True)[0]

        raw = pd.DataFrame(np.random.rand(10, 4))
        c = md.DataFrame(raw, chunk_size=(3, 2))

        r = check_non_negative_then_return_value(c, c, 'sth')
        result = self.executor.execute_tileable(r, concat=True)[0]

        pd.testing.assert_frame_equal(result, raw)

        raw = raw.copy()
        raw.iloc[1, 3] = -1
        c = md.DataFrame(raw, chunk_size=(3, 2))

        r = check_non_negative_then_return_value(c, c, 'sth')
        with self.assertRaises(ValueError):
            _ = self.executor.execute_tileable(r, concat=True)[0]

    def testAssertAllFinite(self):
        raw = np.array([2.3, np.inf], dtype=np.float64)
        x = mt.tensor(raw)

        with self.assertRaises(ValueError):
            r = assert_all_finite(x)
            _ = self.executor.execute_tensor(r)

        raw = np.array([2.3, np.nan], dtype=np.float64)
        x = mt.tensor(raw)

        with self.assertRaises(ValueError):
            r = assert_all_finite(x, allow_nan=False)
            _ = self.executor.execute_tensor(r)

        max_float32 = np.finfo(np.float32).max
        raw = [max_float32] * 2
        self.assertFalse(np.isfinite(np.sum(raw)))
        x = mt.tensor(raw)

        r = assert_all_finite(x)
        result = self.executor.execute_tensor(r, concat=True)[0]
        self.assertTrue(result.item())

        raw = np.array([np.nan, 'a'], dtype=object)
        x = mt.tensor(raw)

        with self.assertRaises(ValueError):
            r = assert_all_finite(x)
            _ = self.executor.execute_tensor(r)

        raw = np.random.rand(10)
        x = mt.tensor(raw, chunk_size=2)

        r = assert_all_finite(x, check_only=False)
        result = self.executor.execute_tensor(r, concat=True)[0]
        np.testing.assert_array_equal(result, raw)

        r = assert_all_finite(x)
        result = self.executor.execute_tensor(r, concat=True)[0]
        self.assertTrue(result.item())

        with option_context() as options:
            options.learn.assume_finite = True

            self.assertIsNone(assert_all_finite(x))
            self.assertIs(assert_all_finite(x, check_only=False), x)

        # test sparse
        s = sps.random(10,
                       3,
                       density=0.1,
                       format='csr',
                       random_state=np.random.RandomState(0))
        s[0, 2] = np.nan

        with self.assertRaises(ValueError):
            r = assert_all_finite(s)
            _ = self.executor.execute_tensor(r)
Ejemplo n.º 5
0
class Test(unittest.TestCase):
    def setUp(self) -> None:
        self.executor = ExecutorForTest('numpy')

    def testManualBuildFaissIndex(self):
        d = 8
        n = 50
        n_test = 10
        x = np.random.RandomState(0).rand(n, d).astype(np.float32)
        y = np.random.RandomState(0).rand(n_test, d).astype(np.float32)

        nn = NearestNeighbors(algorithm='kd_tree')
        nn.fit(x)
        _, expected_indices = nn.kneighbors(y, 5)

        for index_type in ['object', 'filename', 'bytes']:
            # test brute-force search
            X = mt.tensor(x, chunk_size=10)
            index = build_faiss_index(X, 'Flat', None, random_state=0,
                                      same_distribution=True, return_index_type=index_type)
            faiss_index = self.executor.execute_tileable(index)

            index_shards = faiss.IndexShards(d)
            for ind in faiss_index:
                shard = _load_index(None, index.op, ind, -1)
                index_shards.add_shard(shard)
            faiss_index = index_shards

            faiss_index.nprob = 10
            _, indices = faiss_index.search(y, k=5)

            np.testing.assert_array_equal(indices, expected_indices.fetch())

        # test one chunk, brute force
        X = mt.tensor(x, chunk_size=50)
        index = build_faiss_index(X, 'Flat', None, random_state=0,
                                  same_distribution=True, return_index_type='object')
        faiss_index = self.executor.execute_tileable(index)[0]

        faiss_index.nprob = 10
        _, indices = faiss_index.search(y, k=5)

        np.testing.assert_array_equal(indices, expected_indices.fetch())

        # test train, same distribution
        X = mt.tensor(x, chunk_size=10)
        index = build_faiss_index(X, 'IVF30,Flat', 30, random_state=0,
                                  same_distribution=True, return_index_type='object')
        faiss_index = self.executor.execute_tileable(index)[0]

        self.assertIsInstance(faiss_index, faiss.IndexIVFFlat)
        self.assertEqual(faiss_index.ntotal, n)
        self.assertEqual(len(get_tiled(index).chunks), 1)

        # test train, distributions are variant
        X = mt.tensor(x, chunk_size=10)
        index = build_faiss_index(X, 'IVF10,Flat', None, random_state=0,
                                  same_distribution=False, return_index_type='object')
        faiss_index = self.executor.execute_tileable(index)

        self.assertEqual(len(faiss_index), 5)
        for ind in faiss_index:
            self.assertIsInstance(ind, faiss.IndexIVFFlat)
            self.assertEqual(ind.ntotal, 10)

        # test one chunk, train
        X = mt.tensor(x, chunk_size=50)
        index = build_faiss_index(X, 'IVF30,Flat', 30, random_state=0,
                                  same_distribution=True, return_index_type='object')
        faiss_index = self.executor.execute_tileable(index)[0]

        self.assertIsInstance(faiss_index, faiss.IndexIVFFlat)
        self.assertEqual(faiss_index.ntotal, n)

        # test wrong index
        with self.assertRaises(ValueError):
            build_faiss_index(X, 'unknown_index', None)

        # test unknown metric
        with self.assertRaises(ValueError):
            build_faiss_index(X, 'Flat', None, metric='unknown_metric')

    def testFaissQuery(self):
        d = 8
        n = 50
        n_test = 10
        x = np.random.RandomState(0).rand(n, d).astype(np.float32)
        y = np.random.RandomState(1).rand(n_test, d).astype(np.float32)

        test_tensors = [
            # multi chunks
            (mt.tensor(x, chunk_size=(20, 5)), mt.tensor(y, chunk_size=5)),
            # one chunk
            (mt.tensor(x, chunk_size=50), mt.tensor(y, chunk_size=10))
        ]

        for X, Y in test_tensors:
            for metric in ['l2', 'cosine']:
                faiss_index = build_faiss_index(X, 'Flat', None, metric=metric,
                                                random_state=0, return_index_type='object')
                d, i = faiss_query(faiss_index, Y, 5, nprobe=10)
                distance, indices = self.executor.execute_tensors([d, i])

                nn = NearestNeighbors(metric=metric)
                nn.fit(x)
                expected_distance, expected_indices = nn.kneighbors(y, 5)

                np.testing.assert_array_equal(indices, expected_indices.fetch())
                np.testing.assert_almost_equal(distance, expected_distance.fetch())

    def testGenIndexStringAndSampleCount(self):
        d = 32

        # accuracy=True, could be Flat only
        ret = _gen_index_string_and_sample_count((10 ** 9, d), None, True, 'minimum')
        self.assertEqual(ret, ('Flat', None))

        # no memory concern
        ret = _gen_index_string_and_sample_count((10 ** 5, d), None, False, 'maximum')
        self.assertEqual(ret, ('HNSW32', None))
        index = faiss.index_factory(d, ret[0])
        self.assertTrue(index.is_trained)

        # memory concern not much
        ret = _gen_index_string_and_sample_count((10 ** 5, d), None, False, 'high')
        self.assertEqual(ret, ('IVF1580,Flat', 47400))
        index = faiss.index_factory(d, ret[0])
        self.assertFalse(index.is_trained)

        # memory quite important
        ret = _gen_index_string_and_sample_count((5 * 10 ** 6, d), None, False, 'low')
        self.assertEqual(ret, ('PCAR16,IVF65536_HNSW32,SQ8', 32 * 65536))
        index = faiss.index_factory(d, ret[0])
        self.assertFalse(index.is_trained)

        # memory very important
        ret = _gen_index_string_and_sample_count((10 ** 8, d), None, False, 'minimum')
        self.assertEqual(ret, ('OPQ16_32,IVF1048576_HNSW32,PQ16', 64 * 65536))
        index = faiss.index_factory(d, ret[0])
        self.assertFalse(index.is_trained)

        ret = _gen_index_string_and_sample_count((10 ** 10, d), None, False, 'low')
        self.assertEqual(ret, ('PCAR16,IVF1048576_HNSW32,SQ8', 64 * 65536))
        index = faiss.index_factory(d, ret[0])
        self.assertFalse(index.is_trained)

        with self.assertRaises(ValueError):
            # M > 64 raise error
            _gen_index_string_and_sample_count((10 ** 5, d), None, False, 'maximum', M=128)

        with self.assertRaises(ValueError):
            # M > 64
            _gen_index_string_and_sample_count((10 ** 5, d), None, False, 'minimum', M=128)

        with self.assertRaises(ValueError):
            # dim should be multiple of M
            _gen_index_string_and_sample_count((10 ** 5, d), None, False, 'minimum', M=16, dim=17)

        with self.assertRaises(ValueError):
            _gen_index_string_and_sample_count((10 ** 5, d), None, False, 'low', k=5)

    def testAutoIndex(self):
        d = 8
        n = 50
        n_test = 10
        x = np.random.RandomState(0).rand(n, d).astype(np.float32)
        y = np.random.RandomState(1).rand(n_test, d).astype(np.float32)

        for chunk_size in (50, 20):
            X = mt.tensor(x, chunk_size=chunk_size)

            faiss_index = build_faiss_index(X, random_state=0, return_index_type='object')
            d, i = faiss_query(faiss_index, y, 5, nprobe=10)
            indices = self.executor.execute_tensor(i, concat=True)[0]

            nn = NearestNeighbors()
            nn.fit(x)
            expected_indices = nn.kneighbors(y, 5, return_distance=False)

            np.testing.assert_array_equal(indices, expected_indices)