Ejemplo n.º 1
0
class Test(unittest.TestCase):
    def setUp(self):
        self.executor = ExecutorForTest('numpy')

    def testRechunkExecution(self):
        raw = np.random.random((11, 8))
        arr = tensor(raw, chunk_size=3)
        arr2 = arr.rechunk(4)

        res = self.executor.execute_tensor(arr2)

        self.assertTrue(np.array_equal(res[0], raw[:4, :4]))
        self.assertTrue(np.array_equal(res[1], raw[:4, 4:]))
        self.assertTrue(np.array_equal(res[2], raw[4:8, :4]))
        self.assertTrue(np.array_equal(res[3], raw[4:8, 4:]))
        self.assertTrue(np.array_equal(res[4], raw[8:, :4]))
        self.assertTrue(np.array_equal(res[5], raw[8:, 4:]))

    def testCopytoExecution(self):
        a = ones((2, 3), chunk_size=1)
        b = tensor([3, -1, 3], chunk_size=2)

        copyto(a, b, where=b > 1)

        res = self.executor.execute_tensor(a, concat=True)[0]
        expected = np.array([[3, 1, 3], [3, 1, 3]])

        np.testing.assert_equal(res, expected)

        a = ones((2, 3), chunk_size=1)
        b = tensor(np.asfortranarray(np.random.rand(2, 3)), chunk_size=2)

        copyto(b, a)

        res = self.executor.execute_tensor(b, concat=True)[0]
        expected = np.asfortranarray(np.ones((2, 3)))

        np.testing.assert_array_equal(res, expected)
        self.assertTrue(res.flags['F_CONTIGUOUS'])
        self.assertFalse(res.flags['C_CONTIGUOUS'])

    def testAstypeExecution(self):
        raw = np.random.random((10, 5))
        arr = tensor(raw, chunk_size=3)
        arr2 = arr.astype('i8')

        res = self.executor.execute_tensor(arr2, concat=True)
        np.testing.assert_array_equal(res[0], raw.astype('i8'))

        raw = sps.random(10, 5, density=.2)
        arr = tensor(raw, chunk_size=3)
        arr2 = arr.astype('i8')

        res = self.executor.execute_tensor(arr2, concat=True)
        self.assertTrue(
            np.array_equal(res[0].toarray(),
                           raw.astype('i8').toarray()))

        raw = np.asfortranarray(np.random.random((10, 5)))
        arr = tensor(raw, chunk_size=3)
        arr2 = arr.astype('i8', order='C')

        res = self.executor.execute_tensor(arr2, concat=True)[0]
        np.testing.assert_array_equal(res, raw.astype('i8'))
        self.assertTrue(res.flags['C_CONTIGUOUS'])
        self.assertFalse(res.flags['F_CONTIGUOUS'])

    def testTransposeExecution(self):
        raw = np.random.random((11, 8, 5))
        arr = tensor(raw, chunk_size=3)
        arr2 = transpose(arr)

        res = self.executor.execute_tensor(arr2, concat=True)

        np.testing.assert_array_equal(res[0], raw.T)

        arr3 = transpose(arr, axes=(-2, -1, -3))

        res = self.executor.execute_tensor(arr3, concat=True)

        np.testing.assert_array_equal(res[0], raw.transpose(1, 2, 0))

        raw = sps.random(11, 8)
        arr = tensor(raw, chunk_size=3)
        arr2 = transpose(arr)

        self.assertTrue(arr2.issparse())

        res = self.executor.execute_tensor(arr2, concat=True)

        np.testing.assert_array_equal(res[0].toarray(), raw.T.toarray())

        # test order
        raw = np.asfortranarray(np.random.random((11, 8, 5)))

        arr = tensor(raw, chunk_size=3)
        arr2 = transpose(arr)

        res = self.executor.execute_tensor(arr2, concat=True)[0]
        expected = np.transpose(raw).copy(order='A')

        np.testing.assert_array_equal(res, expected)
        self.assertEqual(res.flags['C_CONTIGUOUS'],
                         expected.flags['C_CONTIGUOUS'])
        self.assertEqual(res.flags['F_CONTIGUOUS'],
                         expected.flags['F_CONTIGUOUS'])

        arr = tensor(raw, chunk_size=3)
        arr2 = transpose(arr, (1, 2, 0))

        res = self.executor.execute_tensor(arr2, concat=True)[0]
        expected = np.transpose(raw, (1, 2, 0)).copy(order='A')

        np.testing.assert_array_equal(res, expected)
        self.assertEqual(res.flags['C_CONTIGUOUS'],
                         expected.flags['C_CONTIGUOUS'])
        self.assertEqual(res.flags['F_CONTIGUOUS'],
                         expected.flags['F_CONTIGUOUS'])

    def testSwapaxesExecution(self):
        raw = np.random.random((11, 8, 5))
        arr = tensor(raw, chunk_size=3)
        arr2 = arr.swapaxes(2, 0)

        res = self.executor.execute_tensor(arr2, concat=True)

        np.testing.assert_array_equal(res[0], raw.swapaxes(2, 0))

        raw = sps.random(11, 8, density=.2)
        arr = tensor(raw, chunk_size=3)
        arr2 = arr.swapaxes(1, 0)

        res = self.executor.execute_tensor(arr2, concat=True)

        np.testing.assert_array_equal(res[0].toarray(),
                                      raw.toarray().swapaxes(1, 0))

        # test order
        raw = np.asfortranarray(np.random.rand(11, 8, 5))

        arr = tensor(raw, chunk_size=3)
        arr2 = arr.swapaxes(2, 0)

        res = self.executor.execute_tensor(arr2, concat=True)[0]
        expected = raw.swapaxes(2, 0).copy(order='A')

        np.testing.assert_array_equal(res, expected)
        self.assertEqual(res.flags['C_CONTIGUOUS'],
                         expected.flags['C_CONTIGUOUS'])
        self.assertEqual(res.flags['F_CONTIGUOUS'],
                         expected.flags['F_CONTIGUOUS'])

        arr = tensor(raw, chunk_size=3)
        arr2 = arr.swapaxes(0, 2)

        res = self.executor.execute_tensor(arr2, concat=True)[0]
        expected = raw.swapaxes(0, 2).copy(order='A')

        np.testing.assert_array_equal(res, expected)
        self.assertEqual(res.flags['C_CONTIGUOUS'],
                         expected.flags['C_CONTIGUOUS'])
        self.assertEqual(res.flags['F_CONTIGUOUS'],
                         expected.flags['F_CONTIGUOUS'])

        arr = tensor(raw, chunk_size=3)
        arr2 = arr.swapaxes(1, 0)

        res = self.executor.execute_tensor(arr2, concat=True)[0]
        expected = raw.swapaxes(1, 0).copy(order='A')

        np.testing.assert_array_equal(res, expected)
        self.assertEqual(res.flags['C_CONTIGUOUS'],
                         expected.flags['C_CONTIGUOUS'])
        self.assertEqual(res.flags['F_CONTIGUOUS'],
                         expected.flags['F_CONTIGUOUS'])

    def testMoveaxisExecution(self):
        x = zeros((3, 4, 5), chunk_size=2)

        t = moveaxis(x, 0, -1)

        res = self.executor.execute_tensor(t, concat=True)[0]
        self.assertEqual(res.shape, (4, 5, 3))

        t = moveaxis(x, -1, 0)

        res = self.executor.execute_tensor(t, concat=True)[0]
        self.assertEqual(res.shape, (5, 3, 4))

        t = moveaxis(x, [0, 1], [-1, -2])

        res = self.executor.execute_tensor(t, concat=True)[0]
        self.assertEqual(res.shape, (5, 4, 3))

        t = moveaxis(x, [0, 1, 2], [-1, -2, -3])

        res = self.executor.execute_tensor(t, concat=True)[0]
        self.assertEqual(res.shape, (5, 4, 3))

    def testBroadcastToExecution(self):
        raw = np.random.random((10, 5, 1))
        arr = tensor(raw, chunk_size=2)
        arr2 = broadcast_to(arr, (5, 10, 5, 6))

        res = self.executor.execute_tensor(arr2, concat=True)[0]
        np.testing.assert_array_equal(res, np.broadcast_to(raw, (5, 10, 5, 6)))

        # test chunk with unknown shape
        arr1 = mt.random.rand(3, 4, chunk_size=2)
        arr2 = mt.random.permutation(arr1)
        arr3 = broadcast_to(arr2, (2, 3, 4))

        res = self.executor.execute_tensor(arr3, concat=True)[0]
        self.assertEqual(res.shape, (2, 3, 4))

    def testBroadcastArraysExecutions(self):
        x_data = [[1, 2, 3]]
        x = tensor(x_data, chunk_size=1)
        y_data = [[1], [2], [3]]
        y = tensor(y_data, chunk_size=2)

        a = broadcast_arrays(x, y)

        res = [self.executor.execute_tensor(arr, concat=True)[0] for arr in a]
        expected = np.broadcast_arrays(x_data, y_data)

        for r, e in zip(res, expected):
            np.testing.assert_equal(r, e)

    def testWhereExecution(self):
        raw_cond = np.random.randint(0, 2, size=(4, 4), dtype='?')
        raw_x = np.random.rand(4, 1)
        raw_y = np.random.rand(4, 4)

        cond, x, y = tensor(raw_cond, chunk_size=2), tensor(
            raw_x, chunk_size=2), tensor(raw_y, chunk_size=2)

        arr = where(cond, x, y)
        res = self.executor.execute_tensor(arr, concat=True)
        self.assertTrue(
            np.array_equal(res[0], np.where(raw_cond, raw_x, raw_y)))

        raw_cond = sps.csr_matrix(
            np.random.randint(0, 2, size=(4, 4), dtype='?'))
        raw_x = sps.random(4, 1, density=.1)
        raw_y = sps.random(4, 4, density=.1)

        cond, x, y = tensor(raw_cond, chunk_size=2), tensor(
            raw_x, chunk_size=2), tensor(raw_y, chunk_size=2)

        arr = where(cond, x, y)
        res = self.executor.execute_tensor(arr, concat=True)[0]
        self.assertTrue(
            np.array_equal(
                res.toarray(),
                np.where(raw_cond.toarray(), raw_x.toarray(),
                         raw_y.toarray())))

    def testReshapeExecution(self):
        raw_data = np.random.rand(10, 20, 30)
        x = tensor(raw_data, chunk_size=6)

        y = x.reshape(-1, 30)

        res = self.executor.execute_tensor(y, concat=True)
        np.testing.assert_array_equal(res[0], raw_data.reshape(-1, 30))

        y2 = x.reshape(10, -1)

        res = self.executor.execute_tensor(y2, concat=True)
        np.testing.assert_array_equal(res[0], raw_data.reshape(10, -1))

        y3 = x.reshape(-1)

        res = self.executor.execute_tensor(y3, concat=True)
        np.testing.assert_array_equal(res[0], raw_data.reshape(-1))

        y4 = x.ravel()

        res = self.executor.execute_tensor(y4, concat=True)
        np.testing.assert_array_equal(res[0], raw_data.ravel())

        raw_data = np.random.rand(30, 100, 20)
        x = tensor(raw_data, chunk_size=6)

        y = x.reshape(-1, 20, 5, 5, 4)

        res = self.executor.execute_tensor(y, concat=True)
        np.testing.assert_array_equal(res[0],
                                      raw_data.reshape(-1, 20, 5, 5, 4))

        y2 = x.reshape(3000, 10, 2)

        res = self.executor.execute_tensor(y2, concat=True)
        np.testing.assert_array_equal(res[0], raw_data.reshape(3000, 10, 2))

        y3 = x.reshape(60, 25, 40)

        res = self.executor.execute_tensor(y3, concat=True)
        np.testing.assert_array_equal(res[0], raw_data.reshape(60, 25, 40))

        y4 = x.reshape(60, 25, 40)
        y4.op.extra_params['_reshape_with_shuffle'] = True

        size_res = self.executor.execute_tensor(y4, mock=True)
        res = self.executor.execute_tensor(y4, concat=True)
        self.assertEqual(res[0].nbytes, sum(v[0] for v in size_res))
        self.assertTrue(np.array_equal(res[0], raw_data.reshape(60, 25, 40)))

        y5 = x.ravel(order='F')

        res = self.executor.execute_tensor(y5, concat=True)[0]
        expected = raw_data.ravel(order='F')
        np.testing.assert_array_equal(res, expected)
        self.assertEqual(res.flags['C_CONTIGUOUS'],
                         expected.flags['C_CONTIGUOUS'])
        self.assertEqual(res.flags['F_CONTIGUOUS'],
                         expected.flags['F_CONTIGUOUS'])

    def testExpandDimsExecution(self):
        raw_data = np.random.rand(10, 20, 30)
        x = tensor(raw_data, chunk_size=6)

        y = expand_dims(x, 1)

        res = self.executor.execute_tensor(y, concat=True)
        self.assertTrue(np.array_equal(res[0], np.expand_dims(raw_data, 1)))

        y = expand_dims(x, 0)

        res = self.executor.execute_tensor(y, concat=True)
        self.assertTrue(np.array_equal(res[0], np.expand_dims(raw_data, 0)))

        y = expand_dims(x, 3)

        res = self.executor.execute_tensor(y, concat=True)
        self.assertTrue(np.array_equal(res[0], np.expand_dims(raw_data, 3)))

        y = expand_dims(x, -1)

        res = self.executor.execute_tensor(y, concat=True)
        self.assertTrue(np.array_equal(res[0], np.expand_dims(raw_data, -1)))

        y = expand_dims(x, -4)

        res = self.executor.execute_tensor(y, concat=True)
        self.assertTrue(np.array_equal(res[0], np.expand_dims(raw_data, -4)))

        with self.assertRaises(np.AxisError):
            expand_dims(x, -5)

        with self.assertRaises(np.AxisError):
            expand_dims(x, 4)

    def testRollAxisExecution(self):
        x = ones((3, 4, 5, 6), chunk_size=1)
        y = rollaxis(x, 3, 1)

        res = self.executor.execute_tensor(y, concat=True)
        self.assertTrue(
            np.array_equal(res[0], np.rollaxis(np.ones((3, 4, 5, 6)), 3, 1)))

    def testAtleast1dExecution(self):
        x = 1
        y = ones(3, chunk_size=2)
        z = ones((3, 4), chunk_size=2)

        t = atleast_1d(x, y, z)

        res = [self.executor.execute_tensor(i, concat=True)[0] for i in t]

        self.assertTrue(np.array_equal(res[0], np.array([1])))
        self.assertTrue(np.array_equal(res[1], np.ones(3)))
        self.assertTrue(np.array_equal(res[2], np.ones((3, 4))))

    def testAtleast2dExecution(self):
        x = 1
        y = ones(3, chunk_size=2)
        z = ones((3, 4), chunk_size=2)

        t = atleast_2d(x, y, z)

        res = [self.executor.execute_tensor(i, concat=True)[0] for i in t]

        self.assertTrue(np.array_equal(res[0], np.array([[1]])))
        self.assertTrue(np.array_equal(res[1], np.atleast_2d(np.ones(3))))
        self.assertTrue(np.array_equal(res[2], np.ones((3, 4))))

    def testAtleast3dExecution(self):
        x = 1
        y = ones(3, chunk_size=2)
        z = ones((3, 4), chunk_size=2)

        t = atleast_3d(x, y, z)

        res = [self.executor.execute_tensor(i, concat=True)[0] for i in t]

        self.assertTrue(np.array_equal(res[0], np.atleast_3d(x)))
        self.assertTrue(np.array_equal(res[1], np.atleast_3d(np.ones(3))))
        self.assertTrue(np.array_equal(res[2], np.atleast_3d(np.ones((3, 4)))))

    def testArgwhereExecution(self):
        x = arange(6, chunk_size=2).reshape(2, 3)
        t = argwhere(x > 1)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.argwhere(np.arange(6).reshape(2, 3) > 1)

        np.testing.assert_array_equal(res, expected)

        data = np.asfortranarray(np.random.rand(10, 20))
        x = tensor(data, chunk_size=10)

        t = argwhere(x > 0.5)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.argwhere(data > 0.5)

        np.testing.assert_array_equal(res, expected)
        self.assertTrue(res.flags['F_CONTIGUOUS'])
        self.assertFalse(res.flags['C_CONTIGUOUS'])

    def testArraySplitExecution(self):
        x = arange(48, chunk_size=3).reshape(2, 3, 8)
        ss = array_split(x, 3, axis=2)

        res = [self.executor.execute_tensor(i, concat=True)[0] for i in ss]
        expected = np.array_split(np.arange(48).reshape(2, 3, 8), 3, axis=2)
        self.assertEqual(len(res), len(expected))
        [np.testing.assert_equal(r, e) for r, e in zip(res, expected)]

        ss = array_split(x, [3, 5, 6, 10], axis=2)

        res = [self.executor.execute_tensor(i, concat=True)[0] for i in ss]
        expected = np.array_split(np.arange(48).reshape(2, 3, 8),
                                  [3, 5, 6, 10],
                                  axis=2)
        self.assertEqual(len(res), len(expected))
        [np.testing.assert_equal(r, e) for r, e in zip(res, expected)]

    def testSplitExecution(self):
        x = arange(48, chunk_size=3).reshape(2, 3, 8)
        ss = split(x, 4, axis=2)

        res = [self.executor.execute_tensor(i, concat=True)[0] for i in ss]
        expected = np.split(np.arange(48).reshape(2, 3, 8), 4, axis=2)
        self.assertEqual(len(res), len(expected))
        [np.testing.assert_equal(r, e) for r, e in zip(res, expected)]

        ss = split(x, [3, 5, 6, 10], axis=2)

        res = [self.executor.execute_tensor(i, concat=True)[0] for i in ss]
        expected = np.split(np.arange(48).reshape(2, 3, 8), [3, 5, 6, 10],
                            axis=2)
        self.assertEqual(len(res), len(expected))
        [np.testing.assert_equal(r, e) for r, e in zip(res, expected)]

        # hsplit
        x = arange(120, chunk_size=3).reshape(2, 12, 5)
        ss = hsplit(x, 4)

        res = [self.executor.execute_tensor(i, concat=True)[0] for i in ss]
        expected = np.hsplit(np.arange(120).reshape(2, 12, 5), 4)
        self.assertEqual(len(res), len(expected))
        [np.testing.assert_equal(r, e) for r, e in zip(res, expected)]

        # vsplit
        x = arange(48, chunk_size=3).reshape(8, 3, 2)
        ss = vsplit(x, 4)

        res = [self.executor.execute_tensor(i, concat=True)[0] for i in ss]
        expected = np.vsplit(np.arange(48).reshape(8, 3, 2), 4)
        self.assertEqual(len(res), len(expected))
        [np.testing.assert_equal(r, e) for r, e in zip(res, expected)]

        # dsplit
        x = arange(48, chunk_size=3).reshape(2, 3, 8)
        ss = dsplit(x, 4)

        res = [self.executor.execute_tensor(i, concat=True)[0] for i in ss]
        expected = np.dsplit(np.arange(48).reshape(2, 3, 8), 4)
        self.assertEqual(len(res), len(expected))
        [np.testing.assert_equal(r, e) for r, e in zip(res, expected)]

        x_data = sps.random(12, 8, density=.1)
        x = tensor(x_data, chunk_size=3)
        ss = split(x, 4, axis=0)

        res = [self.executor.execute_tensor(i, concat=True)[0] for i in ss]
        expected = np.split(x_data.toarray(), 4, axis=0)
        self.assertEqual(len(res), len(expected))
        [
            np.testing.assert_equal(r.toarray(), e)
            for r, e in zip(res, expected)
        ]

    def testRollExecution(self):
        x = arange(10, chunk_size=2)

        t = roll(x, 2)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.roll(np.arange(10), 2)
        np.testing.assert_equal(res, expected)

        x2 = x.reshape(2, 5)

        t = roll(x2, 1)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.roll(np.arange(10).reshape(2, 5), 1)
        np.testing.assert_equal(res, expected)

        t = roll(x2, 1, axis=0)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.roll(np.arange(10).reshape(2, 5), 1, axis=0)
        np.testing.assert_equal(res, expected)

        t = roll(x2, 1, axis=1)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.roll(np.arange(10).reshape(2, 5), 1, axis=1)
        np.testing.assert_equal(res, expected)

    def testSqueezeExecution(self):
        data = np.array([[[0], [1], [2]]])
        x = tensor(data, chunk_size=1)

        t = squeeze(x)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.squeeze(data)
        np.testing.assert_equal(res, expected)

        t = squeeze(x, axis=2)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.squeeze(data, axis=2)
        np.testing.assert_equal(res, expected)

    def testDiffExecution(self):
        data = np.array([1, 2, 4, 7, 0])
        x = tensor(data, chunk_size=2)

        t = diff(x)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.diff(data)
        np.testing.assert_equal(res, expected)

        t = diff(x, n=2)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.diff(data, n=2)
        np.testing.assert_equal(res, expected)

        data = np.array([[1, 3, 6, 10], [0, 5, 6, 8]])
        x = tensor(data, chunk_size=2)

        t = diff(x)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.diff(data)
        np.testing.assert_equal(res, expected)

        t = diff(x, axis=0)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.diff(data, axis=0)
        np.testing.assert_equal(res, expected)

        x = mt.arange('1066-10-13', '1066-10-16', dtype=mt.datetime64)
        t = diff(x)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.diff(
            np.arange('1066-10-13', '1066-10-16', dtype=np.datetime64))
        np.testing.assert_equal(res, expected)

    def testEdiff1d(self):
        data = np.array([1, 2, 4, 7, 0])
        x = tensor(data, chunk_size=2)

        t = ediff1d(x)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.ediff1d(data)
        np.testing.assert_equal(res, expected)

        to_begin = tensor(-99, chunk_size=2)
        to_end = tensor([88, 99], chunk_size=2)
        t = ediff1d(x, to_begin=to_begin, to_end=to_end)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.ediff1d(data, to_begin=-99, to_end=np.array([88, 99]))
        np.testing.assert_equal(res, expected)

        data = [[1, 2, 4], [1, 6, 24]]

        t = ediff1d(tensor(data, chunk_size=2))

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.ediff1d(data)
        np.testing.assert_equal(res, expected)

    def testFlipExecution(self):
        a = arange(8, chunk_size=2).reshape((2, 2, 2))

        t = flip(a, 0)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.flip(np.arange(8).reshape(2, 2, 2), 0)
        np.testing.assert_equal(res, expected)

        t = flip(a, 1)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.flip(np.arange(8).reshape(2, 2, 2), 1)
        np.testing.assert_equal(res, expected)

        t = flipud(a)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.flipud(np.arange(8).reshape(2, 2, 2))
        np.testing.assert_equal(res, expected)

        t = fliplr(a)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.fliplr(np.arange(8).reshape(2, 2, 2))
        np.testing.assert_equal(res, expected)

    def testRepeatExecution(self):
        a = repeat(3, 4)

        res = self.executor.execute_tensor(a)[0]
        expected = np.repeat(3, 4)
        np.testing.assert_equal(res, expected)

        x_data = np.random.randn(20, 30)
        x = tensor(x_data, chunk_size=(3, 4))

        t = repeat(x, 2)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.repeat(x_data, 2)
        np.testing.assert_equal(res, expected)

        t = repeat(x, 3, axis=1)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.repeat(x_data, 3, axis=1)
        np.testing.assert_equal(res, expected)

        t = repeat(x, np.arange(20), axis=0)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.repeat(x_data, np.arange(20), axis=0)
        np.testing.assert_equal(res, expected)

        t = repeat(x, arange(20, chunk_size=5), axis=0)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.repeat(x_data, np.arange(20), axis=0)
        np.testing.assert_equal(res, expected)

        x_data = sps.random(20, 30, density=.1)
        x = tensor(x_data, chunk_size=(3, 4))

        t = repeat(x, 2, axis=1)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.repeat(x_data.toarray(), 2, axis=1)
        np.testing.assert_equal(res.toarray(), expected)

    def testTileExecution(self):
        a_data = np.array([0, 1, 2])
        a = tensor(a_data, chunk_size=2)

        t = tile(a, 2)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.tile(a_data, 2)
        np.testing.assert_equal(res, expected)

        t = tile(a, (2, 2))

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.tile(a_data, (2, 2))
        np.testing.assert_equal(res, expected)

        t = tile(a, (2, 1, 2))

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.tile(a_data, (2, 1, 2))
        np.testing.assert_equal(res, expected)

        b_data = np.array([[1, 2], [3, 4]])
        b = tensor(b_data, chunk_size=1)

        t = tile(b, 2)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.tile(b_data, 2)
        np.testing.assert_equal(res, expected)

        t = tile(b, (2, 1))

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.tile(b_data, (2, 1))
        np.testing.assert_equal(res, expected)

        c_data = np.array([1, 2, 3, 4])
        c = tensor(c_data, chunk_size=3)

        t = tile(c, (4, 1))

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.tile(c_data, (4, 1))
        np.testing.assert_equal(res, expected)

    def testIsInExecution(self):
        element = 2 * arange(4, chunk_size=1).reshape((2, 2))
        test_elements = [1, 2, 4, 8]

        mask = isin(element, test_elements)

        res = self.executor.execute_tensor(mask, concat=True)[0]
        expected = np.isin(2 * np.arange(4).reshape((2, 2)), test_elements)
        np.testing.assert_equal(res, expected)

        res = self.executor.execute_tensor(element[mask], concat=True)[0]
        expected = np.array([2, 4])
        np.testing.assert_equal(res, expected)

        mask = isin(element, test_elements, invert=True)

        res = self.executor.execute_tensor(mask, concat=True)[0]
        expected = np.isin(2 * np.arange(4).reshape((2, 2)),
                           test_elements,
                           invert=True)
        np.testing.assert_equal(res, expected)

        res = self.executor.execute_tensor(element[mask], concat=True)[0]
        expected = np.array([0, 6])
        np.testing.assert_equal(res, expected)

        test_set = {1, 2, 4, 8}
        mask = isin(element, test_set)

        res = self.executor.execute_tensor(mask, concat=True)[0]
        expected = np.isin(2 * np.arange(4).reshape((2, 2)), test_set)
        np.testing.assert_equal(res, expected)

    def testRavelExecution(self):
        arr = ones((10, 5), chunk_size=2)
        flat_arr = mt.ravel(arr)

        res = self.executor.execute_tensor(flat_arr, concat=True)[0]
        self.assertEqual(len(res), 50)
        np.testing.assert_equal(res, np.ones(50))

    def testSearchsortedExecution(self):
        raw = np.sort(np.random.randint(100, size=(16, )))

        # test different chunk_size, 3 will have combine, 6 will skip combine
        for chunk_size in (3, 6):
            arr = tensor(raw, chunk_size=chunk_size)

            # test scalar, with value in the middle
            t1 = searchsorted(arr, 20)

            res = self.executor.execute_tensor(t1, concat=True)[0]
            expected = np.searchsorted(raw, 20)
            np.testing.assert_array_equal(res, expected)

            # test scalar, with value larger than 100
            t2 = searchsorted(arr, 200)

            res = self.executor.execute_tensor(t2, concat=True)[0]
            expected = np.searchsorted(raw, 200)
            np.testing.assert_array_equal(res, expected)

            # test scalar, side left, with value exact in the middle of the array
            t3 = searchsorted(arr, raw[10], side='left')

            res = self.executor.execute_tensor(t3, concat=True)[0]
            expected = np.searchsorted(raw, raw[10], side='left')
            np.testing.assert_array_equal(res, expected)

            # test scalar, side right, with value exact in the middle of the array
            t4 = searchsorted(arr, raw[10], side='right')

            res = self.executor.execute_tensor(t4, concat=True)[0]
            expected = np.searchsorted(raw, raw[10], side='right')
            np.testing.assert_array_equal(res, expected)

            # test scalar, side left, with value exact in the end of the array
            t5 = searchsorted(arr, raw[15], side='left')

            res = self.executor.execute_tensor(t5, concat=True)[0]
            expected = np.searchsorted(raw, raw[15], side='left')
            np.testing.assert_array_equal(res, expected)

            # test scalar, side right, with value exact in the end of the array
            t6 = searchsorted(arr, raw[15], side='right')

            res = self.executor.execute_tensor(t6, concat=True)[0]
            expected = np.searchsorted(raw, raw[15], side='right')
            np.testing.assert_array_equal(res, expected)

            # test scalar, side left, with value exact in the start of the array
            t7 = searchsorted(arr, raw[0], side='left')

            res = self.executor.execute_tensor(t7, concat=True)[0]
            expected = np.searchsorted(raw, raw[0], side='left')
            np.testing.assert_array_equal(res, expected)

            # test scalar, side right, with value exact in the start of the array
            t8 = searchsorted(arr, raw[0], side='right')

            res = self.executor.execute_tensor(t8, concat=True)[0]
            expected = np.searchsorted(raw, raw[0], side='right')
            np.testing.assert_array_equal(res, expected)

            raw2 = np.random.randint(100, size=(3, 4))

            # test tensor, side left
            t9 = searchsorted(arr, tensor(raw2, chunk_size=2), side='left')

            res = self.executor.execute_tensor(t9, concat=True)[0]
            expected = np.searchsorted(raw, raw2, side='left')
            np.testing.assert_array_equal(res, expected)

            # test tensor, side right
            t10 = searchsorted(arr, tensor(raw2, chunk_size=2), side='right')

            res = self.executor.execute_tensor(t10, concat=True)[0]
            expected = np.searchsorted(raw, raw2, side='right')
            np.testing.assert_array_equal(res, expected)

        # test one chunk
        arr = tensor(raw, chunk_size=16)

        # test scalar, tensor to search has 1 chunk
        t11 = searchsorted(arr, 20)
        res = self.executor.execute_tensor(t11, concat=True)[0]
        expected = np.searchsorted(raw, 20)
        np.testing.assert_array_equal(res, expected)

        # test tensor with 1 chunk, tensor to search has 1 chunk
        t12 = searchsorted(arr, tensor(raw2, chunk_size=4))

        res = self.executor.execute_tensor(t12, concat=True)[0]
        expected = np.searchsorted(raw, raw2)
        np.testing.assert_array_equal(res, expected)

        # test tensor with more than 1 chunk, tensor to search has 1 chunk
        t13 = searchsorted(arr, tensor(raw2, chunk_size=2))

        res = self.executor.execute_tensor(t13, concat=True)[0]
        expected = np.searchsorted(raw, raw2)
        np.testing.assert_array_equal(res, expected)

        # test sorter
        raw3 = np.random.randint(100, size=(16, ))
        arr = tensor(raw3, chunk_size=3)
        order = np.argsort(raw3)
        order_arr = tensor(order, chunk_size=4)

        t14 = searchsorted(arr, 20, sorter=order_arr)

        res = self.executor.execute_tensor(t14, concat=True)[0]
        expected = np.searchsorted(raw3, 20, sorter=order)
        np.testing.assert_array_equal(res, expected)

    def testUniqueExecution(self):
        rs = np.random.RandomState(0)
        raw = rs.randint(10, size=(10, ))

        for chunk_size in (10, 3):
            x = tensor(raw, chunk_size=chunk_size)

            y = unique(x)

            res = self.executor.execute_tensor(y, concat=True)[0]
            expected = np.unique(raw)
            np.testing.assert_array_equal(res, expected)

            y, indices = unique(x, return_index=True)

            res = self.executor.execute_tensors([y, indices])
            expected = np.unique(raw, return_index=True)
            self.assertEqual(len(res), 2)
            self.assertEqual(len(expected), 2)
            np.testing.assert_array_equal(res[0], expected[0])
            np.testing.assert_array_equal(res[1], expected[1])

            y, inverse = unique(x, return_inverse=True)

            res = self.executor.execute_tensors([y, inverse])
            expected = np.unique(raw, return_inverse=True)
            self.assertEqual(len(res), 2)
            self.assertEqual(len(expected), 2)
            np.testing.assert_array_equal(res[0], expected[0])
            np.testing.assert_array_equal(res[1], expected[1])

            y, counts = unique(x, return_counts=True)

            res = self.executor.execute_tensors([y, counts])
            expected = np.unique(raw, return_counts=True)
            self.assertEqual(len(res), 2)
            self.assertEqual(len(expected), 2)
            np.testing.assert_array_equal(res[0], expected[0])
            np.testing.assert_array_equal(res[1], expected[1])

            y, indices, inverse, counts = unique(x,
                                                 return_index=True,
                                                 return_inverse=True,
                                                 return_counts=True)

            res = self.executor.execute_tensors([y, indices, inverse, counts])
            expected = np.unique(raw,
                                 return_index=True,
                                 return_inverse=True,
                                 return_counts=True)
            self.assertEqual(len(res), 4)
            self.assertEqual(len(expected), 4)
            np.testing.assert_array_equal(res[0], expected[0])
            np.testing.assert_array_equal(res[1], expected[1])
            np.testing.assert_array_equal(res[2], expected[2])
            np.testing.assert_array_equal(res[3], expected[3])

            y, indices, counts = unique(x,
                                        return_index=True,
                                        return_counts=True)

            res = self.executor.execute_tensors([y, indices, counts])
            expected = np.unique(raw, return_index=True, return_counts=True)
            self.assertEqual(len(res), 3)
            self.assertEqual(len(expected), 3)
            np.testing.assert_array_equal(res[0], expected[0])
            np.testing.assert_array_equal(res[1], expected[1])
            np.testing.assert_array_equal(res[2], expected[2])

            raw2 = rs.randint(10, size=(4, 5, 6))
            x2 = tensor(raw2, chunk_size=chunk_size)

            y2 = unique(x2)

            res = self.executor.execute_tensor(y2, concat=True)[0]
            expected = np.unique(raw2)
            np.testing.assert_array_equal(res, expected)

            y2 = unique(x2, axis=1)

            res = self.executor.execute_tensor(y2, concat=True)[0]
            expected = np.unique(raw2, axis=1)
            np.testing.assert_array_equal(res, expected)

            y2 = unique(x2, axis=2)

            res = self.executor.execute_tensor(y2, concat=True)[0]
            expected = np.unique(raw2, axis=2)
            np.testing.assert_array_equal(res, expected)

        raw = rs.randint(10, size=(10, 20))
        raw[:, 0] = raw[:, 11] = rs.randint(10, size=(10, ))
        x = tensor(raw, chunk_size=2)
        y, ind, inv, counts = unique(x,
                                     aggregate_size=3,
                                     axis=1,
                                     return_index=True,
                                     return_inverse=True,
                                     return_counts=True)

        res_unique, res_ind, res_inv, res_counts = self.executor.execute_tensors(
            (y, ind, inv, counts))
        exp_unique, exp_ind, exp_counts = np.unique(raw,
                                                    axis=1,
                                                    return_index=True,
                                                    return_counts=True)
        raw_res_unique = res_unique
        res_unique_df = pd.DataFrame(res_unique)
        res_unique_ind = np.asarray(
            res_unique_df.sort_values(list(range(res_unique.shape[0])),
                                      axis=1).columns)
        res_unique = res_unique[:, res_unique_ind]
        res_ind = res_ind[res_unique_ind]
        res_counts = res_counts[res_unique_ind]

        np.testing.assert_array_equal(res_unique, exp_unique)
        np.testing.assert_array_equal(res_ind, exp_ind)
        np.testing.assert_array_equal(raw_res_unique[:, res_inv], raw)
        np.testing.assert_array_equal(res_counts, exp_counts)

        x = (mt.random.RandomState(0).rand(1000, chunk_size=20) > 0.5).astype(
            np.int32)
        y = unique(x)
        res = np.sort(self.executor.execute_tensor(y, concat=True)[0])
        np.testing.assert_array_equal(res, np.array([0, 1]))

    @require_cupy
    def testToGPUExecution(self):
        raw = np.random.rand(10, 10)
        x = tensor(raw, chunk_size=3)

        gx = to_gpu(x)

        res = self.executor.execute_tensor(gx, concat=True)[0]
        np.testing.assert_array_equal(res.get(), raw)

    @require_cupy
    def testToCPUExecution(self):
        raw = np.random.rand(10, 10)
        x = tensor(raw, chunk_size=3, gpu=True)

        cx = to_cpu(x)

        res = self.executor.execute_tensor(cx, concat=True)[0]
        np.testing.assert_array_equal(res, raw)

    def testSortExecution(self):
        # only 1 chunk when axis = -1
        raw = np.random.rand(100, 10)
        x = tensor(raw, chunk_size=10)

        sx = sort(x)

        res = self.executor.execute_tensor(sx, concat=True)[0]
        np.testing.assert_array_equal(res, np.sort(raw))

        # 1-d chunk
        raw = np.random.rand(100)
        x = tensor(raw, chunk_size=10)

        sx = sort(x)

        res = self.executor.execute_tensor(sx, concat=True)[0]
        np.testing.assert_array_equal(res, np.sort(raw))

        # test force need_align=True
        sx = sort(x)
        sx.op._need_align = True

        res = self.executor.execute_tensor(sx, concat=True)[0]
        self.assertEqual(get_tiled(sx).nsplits, get_tiled(x).nsplits)
        np.testing.assert_array_equal(res, np.sort(raw))

        # test psrs_kinds
        sx = sort(x, psrs_kinds=[None, None, 'quicksort'])

        res = self.executor.execute_tensor(sx, concat=True)[0]
        np.testing.assert_array_equal(res, np.sort(raw))

        # structured dtype
        raw = np.empty(100, dtype=[('id', np.int32), ('size', np.int64)])
        raw['id'] = np.random.randint(1000, size=100, dtype=np.int32)
        raw['size'] = np.random.randint(1000, size=100, dtype=np.int64)
        x = tensor(raw, chunk_size=10)

        sx = sort(x, order=['size', 'id'])

        res = self.executor.execute_tensor(sx, concat=True)[0]
        np.testing.assert_array_equal(res, np.sort(raw, order=['size', 'id']))

        # test psrs_kinds with structured dtype
        sx = sort(x,
                  order=['size', 'id'],
                  psrs_kinds=[None, None, 'quicksort'])

        res = self.executor.execute_tensor(sx, concat=True)[0]
        np.testing.assert_array_equal(res, np.sort(raw, order=['size', 'id']))

        # test flatten case
        raw = np.random.rand(10, 10)
        x = tensor(raw, chunk_size=5)

        sx = sort(x, axis=None)

        res = self.executor.execute_tensor(sx, concat=True)[0]
        np.testing.assert_array_equal(res, np.sort(raw, axis=None))

        # test multi-dimension
        raw = np.random.rand(10, 100)
        x = tensor(raw, chunk_size=(2, 10))

        sx = sort(x, psrs_kinds=['quicksort'] * 3)

        res = self.executor.execute_tensor(sx, concat=True)[0]
        np.testing.assert_array_equal(res, np.sort(raw))

        sx = sort(x, psrs_kinds=[None, None, 'quicksort'])

        res = self.executor.execute_tensor(sx, concat=True)[0]
        np.testing.assert_array_equal(res, np.sort(raw))

        raw = np.random.rand(10, 99)
        x = tensor(raw, chunk_size=(2, 10))

        sx = sort(x)

        res = self.executor.execute_tensor(sx, concat=True)[0]
        np.testing.assert_array_equal(res, np.sort(raw))

        # test 3-d
        raw = np.random.rand(20, 25, 28)
        x = tensor(raw, chunk_size=(10, 5, 7))

        sx = sort(x)

        res = self.executor.execute_tensor(sx, concat=True)[0]
        np.testing.assert_array_equal(res, np.sort(raw))

        sx = sort(x, psrs_kinds=[None, None, 'quicksort'])

        res = self.executor.execute_tensor(sx, concat=True)[0]
        np.testing.assert_array_equal(res, np.sort(raw))

        sx = sort(x, axis=0)

        res = self.executor.execute_tensor(sx, concat=True)[0]
        np.testing.assert_array_equal(res, np.sort(raw, axis=0))

        sx = sort(x, axis=0, psrs_kinds=[None, None, 'quicksort'])

        res = self.executor.execute_tensor(sx, concat=True)[0]
        np.testing.assert_array_equal(res, np.sort(raw, axis=0))

        sx = sort(x, axis=1)

        res = self.executor.execute_tensor(sx, concat=True)[0]
        np.testing.assert_array_equal(res, np.sort(raw, axis=1))

        sx = sort(x, axis=1, psrs_kinds=[None, None, 'quicksort'])

        res = self.executor.execute_tensor(sx, concat=True)[0]
        np.testing.assert_array_equal(res, np.sort(raw, axis=1))

        # test multi-dimension with structured type
        raw = np.empty((10, 100), dtype=[('id', np.int32), ('size', np.int64)])
        raw['id'] = np.random.randint(1000, size=(10, 100), dtype=np.int32)
        raw['size'] = np.random.randint(1000, size=(10, 100), dtype=np.int64)
        x = tensor(raw, chunk_size=(3, 10))

        sx = sort(x)

        res = self.executor.execute_tensor(sx, concat=True)[0]
        np.testing.assert_array_equal(res, np.sort(raw))

        sx = sort(x, order=['size', 'id'])

        res = self.executor.execute_tensor(sx, concat=True)[0]
        np.testing.assert_array_equal(res, np.sort(raw, order=['size', 'id']))

        sx = sort(x, order=['size'])

        res = self.executor.execute_tensor(sx, concat=True)[0]
        np.testing.assert_array_equal(res, np.sort(raw, order=['size']))

        sx = sort(x, axis=0, order=['size', 'id'])

        res = self.executor.execute_tensor(sx, concat=True)[0]
        np.testing.assert_array_equal(
            res, np.sort(raw, axis=0, order=['size', 'id']))

        sx = sort(x,
                  axis=0,
                  order=['size', 'id'],
                  psrs_kinds=[None, None, 'quicksort'])

        res = self.executor.execute_tensor(sx, concat=True)[0]
        np.testing.assert_array_equal(
            res, np.sort(raw, axis=0, order=['size', 'id']))

        raw = np.random.rand(10, 12)
        a = tensor(raw, chunk_size=(5, 4))
        a.sort(axis=1)

        res = self.executor.execute_tensor(a, concat=True)[0]
        np.testing.assert_array_equal(res, np.sort(raw, axis=1))

        a.sort(axis=0)

        res = self.executor.execute_tensor(a, concat=True)[0]
        np.testing.assert_array_equal(res, np.sort(np.sort(raw, axis=1),
                                                   axis=0))

    def testPartitionExecution(self):
        # only 1 chunk when axis = -1
        raw = np.random.rand(100, 10)
        x = tensor(raw, chunk_size=10)

        px = partition(x, [1, 8])

        res = self.executor.execute_tensor(px, concat=True)[0]
        np.testing.assert_array_equal(res, np.partition(raw, [1, 8]))

        # 1-d chunk
        raw = np.random.rand(100)
        x = tensor(raw, chunk_size=10)

        kth = np.random.RandomState(0).randint(-100, 100, size=(10, ))
        px = partition(x, kth)

        res = self.executor.execute_tensor(px, concat=True)[0]
        np.testing.assert_array_equal(res[kth], np.partition(raw, kth)[kth])

        # structured dtype
        raw = np.empty(100, dtype=[('id', np.int32), ('size', np.int64)])
        raw['id'] = np.random.randint(1000, size=100, dtype=np.int32)
        raw['size'] = np.random.randint(1000, size=100, dtype=np.int64)
        x = tensor(raw, chunk_size=10)

        px = partition(x, kth, order=['size', 'id'])

        res = self.executor.execute_tensor(px, concat=True)[0]
        np.testing.assert_array_equal(
            res[kth],
            np.partition(raw, kth, order=['size', 'id'])[kth])

        # test flatten case
        raw = np.random.rand(10, 10)
        x = tensor(raw, chunk_size=5)

        px = partition(x, kth, axis=None)

        res = self.executor.execute_tensor(px, concat=True)[0]
        np.testing.assert_array_equal(res[kth],
                                      np.partition(raw, kth, axis=None)[kth])

        # test multi-dimension
        raw = np.random.rand(10, 100)
        x = tensor(raw, chunk_size=(2, 10))

        kth = np.random.RandomState(0).randint(-10, 10, size=(3, ))
        px = partition(x, kth)

        res = self.executor.execute_tensor(px, concat=True)[0]
        np.testing.assert_array_equal(res[:, kth],
                                      np.partition(raw, kth)[:, kth])

        raw = np.random.rand(10, 99)
        x = tensor(raw, chunk_size=(2, 10))

        px = partition(x, kth)

        res = self.executor.execute_tensor(px, concat=True)[0]
        np.testing.assert_array_equal(res[:, kth],
                                      np.partition(raw, kth)[:, kth])

        # test 3-d
        raw = np.random.rand(20, 25, 28)
        x = tensor(raw, chunk_size=(10, 5, 7))

        kth = np.random.RandomState(0).randint(-28, 28, size=(3, ))
        px = partition(x, kth)

        res = self.executor.execute_tensor(px, concat=True)[0]
        np.testing.assert_array_equal(res[:, :, kth],
                                      np.partition(raw, kth)[:, :, kth])

        kth = np.random.RandomState(0).randint(-20, 20, size=(3, ))
        px = partition(x, kth, axis=0)

        res = self.executor.execute_tensor(px, concat=True)[0]
        np.testing.assert_array_equal(res[kth],
                                      np.partition(raw, kth, axis=0)[kth])

        kth = np.random.RandomState(0).randint(-25, 25, size=(3, ))
        px = partition(x, kth, axis=1)

        res = self.executor.execute_tensor(px, concat=True)[0]
        np.testing.assert_array_equal(res[:, kth],
                                      np.partition(raw, kth, axis=1)[:, kth])

        # test multi-dimension with structured type
        raw = np.empty((10, 100), dtype=[('id', np.int32), ('size', np.int64)])
        raw['id'] = np.random.randint(1000, size=(10, 100), dtype=np.int32)
        raw['size'] = np.random.randint(1000, size=(10, 100), dtype=np.int64)
        x = tensor(raw, chunk_size=(3, 10))

        kth = np.random.RandomState(0).randint(-100, 100, size=(10, ))
        px = partition(x, kth)

        res = self.executor.execute_tensor(px, concat=True)[0]
        np.testing.assert_array_equal(res[:, kth],
                                      np.partition(raw, kth)[:, kth])

        px = partition(x, kth, order=['size', 'id'])

        res = self.executor.execute_tensor(px, concat=True)[0]
        np.testing.assert_array_equal(
            res[:, kth],
            np.partition(raw, kth, order=['size', 'id'])[:, kth])

        px = partition(x, kth, order=['size'])

        res = self.executor.execute_tensor(px, concat=True)[0]
        np.testing.assert_array_equal(
            res[:, kth],
            np.partition(raw, kth, order=['size'])[:, kth])

        kth = np.random.RandomState(0).randint(-10, 10, size=(5, ))
        px = partition(x, kth, axis=0, order=['size', 'id'])

        res = self.executor.execute_tensor(px, concat=True)[0]
        np.testing.assert_array_equal(
            res[kth],
            np.partition(raw, kth, axis=0, order=['size', 'id'])[kth])

        raw = np.random.rand(10, 12)
        a = tensor(raw, chunk_size=(5, 4))
        kth = np.random.RandomState(0).randint(-12, 12, size=(2, ))
        a.partition(kth, axis=1)

        res = self.executor.execute_tensor(a, concat=True)[0]
        np.testing.assert_array_equal(res[:, kth],
                                      np.partition(raw, kth, axis=1)[:, kth])

        kth = np.random.RandomState(0).randint(-10, 10, size=(2, ))
        a.partition(kth, axis=0)

        raw_base = res
        res = self.executor.execute_tensor(a, concat=True)[0]
        np.testing.assert_array_equal(res[kth],
                                      np.partition(raw_base, kth, axis=0)[kth])

        # test kth which is tensor
        raw = np.random.rand(10, 12)
        a = tensor(raw, chunk_size=(3, 5))
        kth = (mt.random.rand(5) * 24 - 12).astype(int)

        px = partition(a, kth)
        sx = sort(a)

        res = self.executor.execute_tensor(px, concat=True)[0]
        kth_res = self.executor.execute_tensor(kth, concat=True)[0]
        sort_res = self.executor.execute_tensor(sx, concat=True)[0]
        np.testing.assert_array_equal(res[:, kth_res], sort_res[:, kth_res])

        a = tensor(raw, chunk_size=(10, 12))
        kth = (mt.random.rand(5) * 24 - 12).astype(int)

        px = partition(a, kth)
        sx = sort(a)

        res = self.executor.execute_tensor(px, concat=True)[0]
        kth_res = self.executor.execute_tensor(kth, concat=True)[0]
        sort_res = self.executor.execute_tensor(sx, concat=True)[0]
        np.testing.assert_array_equal(res[:, kth_res], sort_res[:, kth_res])
Ejemplo n.º 2
0
class Test(unittest.TestCase):
    def setUp(self):
        self.executor = ExecutorForTest('numpy')

    def _nan_equal(self, a, b):
        try:
            np.testing.assert_equal(a, b)
        except AssertionError:
            return False
        return True

    def testBaseExecution(self):
        arr = ones((10, 8), chunk_size=2)
        arr2 = arr + 1

        res = self.executor.execute_tensor(arr2)

        self.assertTrue((res[0] == np.ones((2, 2)) + 1).all())

        data = np.random.random((10, 8, 3))
        arr = tensor(data, chunk_size=2)
        arr2 = arr + 1

        res = self.executor.execute_tensor(arr2)

        self.assertTrue((res[0] == data[:2, :2, :2] + 1).all())

    def testBaseOrderExecution(self):
        raw = np.asfortranarray(np.random.rand(5, 6))
        arr = tensor(raw, chunk_size=3)

        res = self.executor.execute_tensor(arr + 1, concat=True)[0]
        np.testing.assert_array_equal(res, raw + 1)
        self.assertFalse(res.flags['C_CONTIGUOUS'])
        self.assertTrue(res.flags['F_CONTIGUOUS'])

        res2 = self.executor.execute_tensor(add(arr, 1, order='C'), concat=True)[0]
        np.testing.assert_array_equal(res2, np.add(raw, 1, order='C'))
        self.assertTrue(res2.flags['C_CONTIGUOUS'])
        self.assertFalse(res2.flags['F_CONTIGUOUS'])

    @staticmethod
    def _get_func(op):
        if isinstance(op, str):
            return getattr(np, op)
        return op

    def testUfuncExecution(self):
        from mars.tensor.arithmetic import UNARY_UFUNC, BIN_UFUNC, arccosh, \
            invert, mod, fmod, bitand, bitor, bitxor, lshift, rshift, ldexp

        _sp_unary_ufunc = {arccosh, invert}
        _sp_bin_ufunc = {mod, fmod, bitand, bitor, bitxor, lshift, rshift, ldexp}

        data1 = np.random.random((5, 9, 4))
        data2 = np.random.random((5, 9, 4))
        rand = np.random.random()
        arr1 = tensor(data1, chunk_size=3)
        arr2 = tensor(data2, chunk_size=3)

        _new_unary_ufunc = UNARY_UFUNC - _sp_unary_ufunc
        for func in _new_unary_ufunc:
            res_tensor = func(arr1)
            res = self.executor.execute_tensor(res_tensor, concat=True)
            expected = self._get_func(res_tensor.op._func_name)(data1)
            self.assertTrue(np.allclose(res[0], expected))

        _new_bin_ufunc = BIN_UFUNC - _sp_bin_ufunc
        for func in _new_bin_ufunc:
            res_tensor1 = func(arr1, arr2)
            res_tensor2 = func(arr1, rand)
            res_tensor3 = func(rand, arr1)

            res1 = self.executor.execute_tensor(res_tensor1, concat=True)
            res2 = self.executor.execute_tensor(res_tensor2, concat=True)
            res3 = self.executor.execute_tensor(res_tensor3, concat=True)

            expected1 = self._get_func(res_tensor1.op._func_name)(data1, data2)
            expected2 = self._get_func(res_tensor1.op._func_name)(data1, rand)
            expected3 = self._get_func(res_tensor1.op._func_name)(rand, data1)

            self.assertTrue(np.allclose(res1[0], expected1))
            self.assertTrue(np.allclose(res2[0], expected2))
            self.assertTrue(np.allclose(res3[0], expected3))

        data1 = np.random.randint(2, 10, size=(10, 10, 10))
        data2 = np.random.randint(2, 10, size=(10, 10, 10))
        rand = np.random.randint(1, 10)
        arr1 = tensor(data1, chunk_size=6)
        arr2 = tensor(data2, chunk_size=6)

        for func in _sp_unary_ufunc:
            res_tensor = func(arr1)
            res = self.executor.execute_tensor(res_tensor, concat=True)
            expected = self._get_func(res_tensor.op._func_name)(data1)
            self.assertTrue(np.allclose(res[0], expected))

        for func in _sp_bin_ufunc:
            res_tensor1 = func(arr1, arr2)
            res_tensor2 = func(arr1, rand)
            res_tensor3 = func(rand, arr1)

            res1 = self.executor.execute_tensor(res_tensor1, concat=True)
            res2 = self.executor.execute_tensor(res_tensor2, concat=True)
            res3 = self.executor.execute_tensor(res_tensor3, concat=True)

            expected1 = self._get_func(res_tensor1.op._func_name)(data1, data2)
            expected2 = self._get_func(res_tensor1.op._func_name)(data1, rand)
            expected3 = self._get_func(res_tensor1.op._func_name)(rand, data1)

            self.assertTrue(np.allclose(res1[0], expected1))
            self.assertTrue(np.allclose(res2[0], expected2))
            self.assertTrue(np.allclose(res3[0], expected3))

    @staticmethod
    def _get_sparse_func(op):
        from mars.lib.sparse.core import issparse

        if isinstance(op, str):
            op = getattr(np, op)

        def func(*args):
            new_args = []
            for arg in args:
                if issparse(arg):
                    new_args.append(arg.toarray())
                else:
                    new_args.append(arg)

            return op(*new_args)

        return func

    @staticmethod
    def toarray(x):
        if hasattr(x, 'toarray'):
            return x.toarray()
        return x

    @ignore_warning
    def testSparseUfuncExexution(self):
        from mars.tensor.arithmetic import UNARY_UFUNC, BIN_UFUNC, arccosh, \
            invert, mod, fmod, bitand, bitor, bitxor, lshift, rshift, ldexp

        _sp_unary_ufunc = {arccosh, invert}
        _sp_bin_ufunc = {mod, fmod, bitand, bitor, bitxor, lshift, rshift, ldexp}

        data1 = sps.random(5, 9, density=.1)
        data2 = sps.random(5, 9, density=.2)
        rand = np.random.random()
        arr1 = tensor(data1, chunk_size=3)
        arr2 = tensor(data2, chunk_size=3)

        _new_unary_ufunc = UNARY_UFUNC - _sp_unary_ufunc
        for func in _new_unary_ufunc:
            res_tensor = func(arr1)
            res = self.executor.execute_tensor(res_tensor, concat=True)
            expected = self._get_sparse_func(res_tensor.op._func_name)(data1)
            self._nan_equal(self.toarray(res[0]), expected)

        _new_bin_ufunc = BIN_UFUNC - _sp_bin_ufunc
        for func in _new_bin_ufunc:
            res_tensor1 = func(arr1, arr2)
            res_tensor2 = func(arr1, rand)
            res_tensor3 = func(rand, arr1)

            res1 = self.executor.execute_tensor(res_tensor1, concat=True)
            res2 = self.executor.execute_tensor(res_tensor2, concat=True)
            res3 = self.executor.execute_tensor(res_tensor3, concat=True)

            expected1 = self._get_sparse_func(res_tensor1.op._func_name)(data1, data2)
            expected2 = self._get_sparse_func(res_tensor1.op._func_name)(data1, rand)
            expected3 = self._get_sparse_func(res_tensor1.op._func_name)(rand, data1)

            self._nan_equal(self.toarray(res1[0]), expected1)
            self._nan_equal(self.toarray(res2[0]), expected2)
            self._nan_equal(self.toarray(res3[0]), expected3)

        data1 = np.random.randint(2, 10, size=(10, 10))
        data2 = np.random.randint(2, 10, size=(10, 10))
        rand = np.random.randint(1, 10)
        arr1 = tensor(data1, chunk_size=3).tosparse()
        arr2 = tensor(data2, chunk_size=3).tosparse()

        for func in _sp_unary_ufunc:
            res_tensor = func(arr1)
            res = self.executor.execute_tensor(res_tensor, concat=True)
            expected = self._get_sparse_func(res_tensor.op._func_name)(data1)
            self._nan_equal(self.toarray(res[0]), expected)

        for func in _sp_bin_ufunc:
            res_tensor1 = func(arr1, arr2)
            res_tensor2 = func(arr1, rand)
            res_tensor3 = func(rand, arr1)

            res1 = self.executor.execute_tensor(res_tensor1, concat=True)
            res2 = self.executor.execute_tensor(res_tensor2, concat=True)
            res3 = self.executor.execute_tensor(res_tensor3, concat=True)

            expected1 = self._get_sparse_func(res_tensor1.op._func_name)(data1, data2)
            expected2 = self._get_sparse_func(res_tensor1.op._func_name)(data1, rand)
            expected3 = self._get_sparse_func(res_tensor1.op._func_name)(rand, data1)

            self._nan_equal(self.toarray(res1[0]), expected1)
            self._nan_equal(self.toarray(res2[0]), expected2)
            self._nan_equal(self.toarray(res3[0]), expected3)

    def testAddWithOutExecution(self):
        data1 = np.random.random((5, 9, 4))
        data2 = np.random.random((9, 4))

        arr1 = tensor(data1.copy(), chunk_size=3)
        arr2 = tensor(data2.copy(), chunk_size=3)

        add(arr1, arr2, out=arr1)
        res = self.executor.execute_tensor(arr1, concat=True)[0]
        self.assertTrue(np.array_equal(res, data1 + data2))

        arr1 = tensor(data1.copy(), chunk_size=3)
        arr2 = tensor(data2.copy(), chunk_size=3)

        arr3 = add(arr1, arr2, out=arr1.astype('i4'), casting='unsafe')
        res = self.executor.execute_tensor(arr3, concat=True)[0]
        np.testing.assert_array_equal(res, (data1 + data2).astype('i4'))

        arr1 = tensor(data1.copy(), chunk_size=3)
        arr2 = tensor(data2.copy(), chunk_size=3)

        arr3 = truediv(arr1, arr2, out=arr1, where=arr2 > .5)
        res = self.executor.execute_tensor(arr3, concat=True)[0]
        self.assertTrue(np.array_equal(
            res, np.true_divide(data1, data2, out=data1.copy(), where=data2 > .5)))

        arr1 = tensor(data1.copy(), chunk_size=4)
        arr2 = tensor(data2.copy(), chunk_size=4)

        arr3 = add(arr1, arr2, where=arr1 > .5)
        res = self.executor.execute_tensor(arr3, concat=True)[0]
        expected = np.add(data1, data2, where=data1 > .5)
        self.assertTrue(np.array_equal(res[data1 > .5], expected[data1 > .5]))

        arr1 = tensor(data1.copy(), chunk_size=4)

        arr3 = add(arr1, 1, where=arr1 > .5)
        res = self.executor.execute_tensor(arr3, concat=True)[0]
        expected = np.add(data1, 1, where=data1 > .5)
        self.assertTrue(np.array_equal(res[data1 > .5], expected[data1 > .5]))

        arr1 = tensor(data2.copy(), chunk_size=3)

        arr3 = add(arr1[:5, :], 1, out=arr1[-5:, :])
        res = self.executor.execute_tensor(arr3, concat=True)[0]
        expected = np.add(data2[:5, :], 1)
        self.assertTrue(np.array_equal(res, expected))

    def testArctan2Execution(self):
        x = tensor(1)  # scalar
        y = arctan2(x, x)

        self.assertFalse(y.issparse())
        result = self.executor.execute_tensor(y, concat=True)[0]
        np.testing.assert_equal(result, np.arctan2(1, 1))

        y = arctan2(0, x)

        self.assertFalse(y.issparse())
        result = self.executor.execute_tensor(y, concat=True)[0]
        np.testing.assert_equal(result, np.arctan2(0, 1))

        raw1 = np.array([[0, 1, 2]])
        raw2 = sps.csr_matrix([[0, 1, 0]])
        y = arctan2(raw1, raw2)

        self.assertFalse(y.issparse())
        result = self.executor.execute_tensor(y, concat=True)[0]
        np.testing.assert_equal(result, np.arctan2(raw1, raw2.A))

        y = arctan2(raw2, raw2)

        self.assertTrue(y.issparse())
        result = self.executor.execute_tensor(y, concat=True)[0]
        np.testing.assert_equal(result, np.arctan2(raw2.A, raw2.A))

        y = arctan2(0, raw2)

        self.assertTrue(y.issparse())
        result = self.executor.execute_tensor(y, concat=True)[0]
        np.testing.assert_equal(result, np.arctan2(0, raw2.A))

    def testFrexpExecution(self):
        data1 = np.random.random((5, 9, 4))

        arr1 = tensor(data1.copy(), chunk_size=3)

        o1, o2 = frexp(arr1)
        o = o1 + o2

        res = self.executor.execute_tensor(o, concat=True)[0]
        expected = sum(np.frexp(data1))
        self.assertTrue(np.allclose(res, expected))

        arr1 = tensor(data1.copy(), chunk_size=3)
        o1 = zeros(data1.shape, chunk_size=3)
        o2 = zeros(data1.shape, dtype='i8', chunk_size=3)
        frexp(arr1, o1, o2)
        o = o1 + o2

        res = self.executor.execute_tensor(o, concat=True)[0]
        expected = sum(np.frexp(data1))
        self.assertTrue(np.allclose(res, expected))

        data1 = sps.random(5, 9, density=.1)

        arr1 = tensor(data1.copy(), chunk_size=3)

        o1, o2 = frexp(arr1)
        o = o1 + o2

        res = self.executor.execute_tensor(o, concat=True)[0]
        expected = sum(np.frexp(data1.toarray()))
        np.testing.assert_equal(res.toarray(), expected)

    def testFrexpOrderExecution(self):
        data1 = np.random.random((5, 9))
        t = tensor(data1, chunk_size=3)

        o1, o2 = frexp(t, order='F')
        res1, res2 = self.executor.execute_tileables([o1, o2])
        expected1, expected2 = np.frexp(data1, order='F')
        np.testing.assert_allclose(res1, expected1)
        self.assertTrue(res1.flags['F_CONTIGUOUS'])
        self.assertFalse(res1.flags['C_CONTIGUOUS'])
        np.testing.assert_allclose(res2, expected2)
        self.assertTrue(res2.flags['F_CONTIGUOUS'])
        self.assertFalse(res2.flags['C_CONTIGUOUS'])

    def testModfExecution(self):
        data1 = np.random.random((5, 9))

        arr1 = tensor(data1.copy(), chunk_size=3)

        o1, o2 = modf(arr1)
        o = o1 + o2

        res = self.executor.execute_tensor(o, concat=True)[0]
        expected = sum(np.modf(data1))
        self.assertTrue(np.allclose(res, expected))

        o1, o2 = modf([0, 3.5])
        o = o1 + o2

        res = self.executor.execute_tensor(o, concat=True)[0]
        expected = sum(np.modf([0, 3.5]))
        self.assertTrue(np.allclose(res, expected))

        arr1 = tensor(data1.copy(), chunk_size=3)
        o1 = zeros(data1.shape, chunk_size=3)
        o2 = zeros(data1.shape, chunk_size=3)
        modf(arr1, o1, o2)
        o = o1 + o2

        res = self.executor.execute_tensor(o, concat=True)[0]
        expected = sum(np.modf(data1))
        self.assertTrue(np.allclose(res, expected))

        data1 = sps.random(5, 9, density=.1)

        arr1 = tensor(data1.copy(), chunk_size=3)

        o1, o2 = modf(arr1)
        o = o1 + o2

        res = self.executor.execute_tensor(o, concat=True)[0]
        expected = sum(np.modf(data1.toarray()))
        np.testing.assert_equal(res.toarray(), expected)

    def testModfOrderExecution(self):
        data1 = np.random.random((5, 9))
        t = tensor(data1, chunk_size=3)

        o1, o2 = modf(t, order='F')
        res1, res2 = self.executor.execute_tileables([o1, o2])
        expected1, expected2 = np.modf(data1, order='F')
        np.testing.assert_allclose(res1, expected1)
        self.assertTrue(res1.flags['F_CONTIGUOUS'])
        self.assertFalse(res1.flags['C_CONTIGUOUS'])
        np.testing.assert_allclose(res2, expected2)
        self.assertTrue(res2.flags['F_CONTIGUOUS'])
        self.assertFalse(res2.flags['C_CONTIGUOUS'])

    def testClipExecution(self):
        a_data = np.arange(10)

        a = tensor(a_data.copy(), chunk_size=3)

        b = clip(a, 1, 8)

        res = self.executor.execute_tensor(b, concat=True)[0]
        expected = np.clip(a_data, 1, 8)
        self.assertTrue(np.array_equal(res, expected))

        a = tensor(a_data.copy(), chunk_size=3)
        clip(a, 3, 6, out=a)

        res = self.executor.execute_tensor(a, concat=True)[0]
        expected = np.clip(a_data, 3, 6)
        self.assertTrue(np.array_equal(res, expected))

        a = tensor(a_data.copy(), chunk_size=3)
        a_min_data = np.random.randint(1, 10, size=(10,))
        a_max_data = np.random.randint(1, 10, size=(10,))
        a_min = tensor(a_min_data)
        a_max = tensor(a_max_data)
        clip(a, a_min, a_max, out=a)

        res = self.executor.execute_tensor(a, concat=True)[0]
        expected = np.clip(a_data, a_min_data, a_max_data)
        self.assertTrue(np.array_equal(res, expected))

        with option_context() as options:
            options.chunk_size = 3

            a = tensor(a_data.copy(), chunk_size=3)
            b = clip(a, [3, 4, 1, 1, 1, 4, 4, 4, 4, 4], 8)

            res = self.executor.execute_tensor(b, concat=True)[0]
            expected = np.clip(a_data, [3, 4, 1, 1, 1, 4, 4, 4, 4, 4], 8)
            self.assertTrue(np.array_equal(res, expected))

            # test sparse clip
            a_data = sps.csr_matrix([[0, 2, 8], [0, 0, -1]])
            a = tensor(a_data, chunk_size=3)
            b_data = sps.csr_matrix([[0, 3, 0], [1, 0, -2]])

            c = clip(a, b_data, 4)

            res = self.executor.execute_tensor(c, concat=True)[0]
            expected = np.clip(a_data.toarray(), b_data.toarray(), 4)
            self.assertTrue(np.array_equal(res.toarray(), expected))

    def testClipOrderExecution(self):
        a_data = np.asfortranarray(np.random.rand(4, 8))

        a = tensor(a_data, chunk_size=3)

        b = clip(a, 0.2, 0.8)

        res = self.executor.execute_tensor(b, concat=True)[0]
        expected = np.clip(a_data, 0.2, 0.8)

        np.testing.assert_allclose(res, expected)
        self.assertTrue(res.flags['F_CONTIGUOUS'])
        self.assertFalse(res.flags['C_CONTIGUOUS'])

    def testAroundExecution(self):
        data = np.random.randn(10, 20)
        x = tensor(data, chunk_size=3)

        t = x.round(2)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.around(data, decimals=2)

        np.testing.assert_allclose(res, expected)

        data = sps.random(10, 20, density=.2)
        x = tensor(data, chunk_size=3)

        t = x.round(2)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.around(data.toarray(), decimals=2)

        np.testing.assert_allclose(res.toarray(), expected)

    def testAroundOrderExecution(self):
        data = np.asfortranarray(np.random.rand(10, 20))
        x = tensor(data, chunk_size=3)

        t = x.round(2)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.around(data, decimals=2)

        np.testing.assert_allclose(res, expected)
        self.assertTrue(res.flags['F_CONTIGUOUS'])
        self.assertFalse(res.flags['C_CONTIGUOUS'])

    def testCosOrderExecution(self):
        data = np.asfortranarray(np.random.rand(3, 5))
        x = tensor(data, chunk_size=2)

        t = cos(x)

        res = self.executor.execute_tensor(t, concat=True)[0]
        np.testing.assert_allclose(res, np.cos(data))
        self.assertFalse(res.flags['C_CONTIGUOUS'])
        self.assertTrue(res.flags['F_CONTIGUOUS'])

        t2 = cos(x, order='C')

        res2 = self.executor.execute_tensor(t2, concat=True)[0]
        np.testing.assert_allclose(res2, np.cos(data, order='C'))
        self.assertTrue(res2.flags['C_CONTIGUOUS'])
        self.assertFalse(res2.flags['F_CONTIGUOUS'])

    def testIsCloseExecution(self):
        data = np.array([1.05, 1.0, 1.01, np.nan])
        data2 = np.array([1.04, 1.0, 1.03, np.nan])

        x = tensor(data, chunk_size=2)
        y = tensor(data2, chunk_size=3)

        z = isclose(x, y, atol=.01)

        res = self.executor.execute_tensor(z, concat=True)[0]
        expected = np.isclose(data, data2, atol=.01)
        np.testing.assert_equal(res, expected)

        z = isclose(x, y, atol=.01, equal_nan=True)

        res = self.executor.execute_tensor(z, concat=True)[0]
        expected = np.isclose(data, data2, atol=.01, equal_nan=True)
        np.testing.assert_equal(res, expected)

        # test tensor with scalar
        z = isclose(x, 1.0, atol=.01)
        res = self.executor.execute_tensor(z, concat=True)[0]
        expected = np.isclose(data, 1.0, atol=.01)
        np.testing.assert_equal(res, expected)
        z = isclose(1.0, y, atol=.01)
        res = self.executor.execute_tensor(z, concat=True)[0]
        expected = np.isclose(1.0, data2, atol=.01)
        np.testing.assert_equal(res, expected)
        z = isclose(1.0, 2.0, atol=.01)
        res = self.executor.execute_tensor(z, concat=True)[0]
        expected = np.isclose(1.0, 2.0, atol=.01)
        np.testing.assert_equal(res, expected)

        # test sparse
        data = sps.csr_matrix(np.array([0, 1.0, 1.01, np.nan]))
        data2 = sps.csr_matrix(np.array([0, 1.0, 1.03, np.nan]))

        x = tensor(data, chunk_size=2)
        y = tensor(data2, chunk_size=3)

        z = isclose(x, y, atol=.01)

        res = self.executor.execute_tensor(z, concat=True)[0]
        expected = np.isclose(data.toarray(), data2.toarray(), atol=.01)
        np.testing.assert_equal(res, expected)

        z = isclose(x, y, atol=.01, equal_nan=True)

        res = self.executor.execute_tensor(z, concat=True)[0]
        expected = np.isclose(data.toarray(), data2.toarray(), atol=.01, equal_nan=True)
        np.testing.assert_equal(res, expected)

    @ignore_warning
    def testDtypeExecution(self):
        a = ones((10, 20), dtype='f4', chunk_size=5)

        c = truediv(a, 2, dtype='f8')

        res = self.executor.execute_tensor(c, concat=True)[0]
        self.assertEqual(res.dtype, np.float64)

        c = truediv(a, 0, dtype='f8')
        res = self.executor.execute_tensor(c, concat=True)[0]
        self.assertTrue(np.isinf(res[0, 0]))

        with self.assertRaises(FloatingPointError):
            with np.errstate(divide='raise'):
                c = truediv(a, 0, dtype='f8')
                _ = self.executor.execute_tensor(c, concat=True)[0]  # noqa: F841

    def testSetGetRealExecution(self):
        a_data = np.array([1+2j, 3+4j, 5+6j])
        a = tensor(a_data, chunk_size=2)

        res = self.executor.execute_tensor(a.real, concat=True)[0]
        expected = a_data.real

        np.testing.assert_equal(res, expected)

        a.real = 9

        res = self.executor.execute_tensor(a, concat=True)[0]
        expected = a_data.copy()
        expected.real = 9

        np.testing.assert_equal(res, expected)

        a.real = np.array([9, 8, 7])

        res = self.executor.execute_tensor(a, concat=True)[0]
        expected = a_data.copy()
        expected.real = np.array([9, 8, 7])

        np.testing.assert_equal(res, expected)

        # test sparse
        a_data = np.array([[1+2j, 3+4j, 0], [0, 0, 0]])
        a = tensor(sps.csr_matrix(a_data))

        res = self.executor.execute_tensor(a.real, concat=True)[0].toarray()
        expected = a_data.real

        np.testing.assert_equal(res, expected)

        a.real = 9

        res = self.executor.execute_tensor(a, concat=True)[0].toarray()
        expected = a_data.copy()
        expected.real = 9

        np.testing.assert_equal(res, expected)

        a.real = np.array([9, 8, 7])

        res = self.executor.execute_tensor(a, concat=True)[0].toarray()
        expected = a_data.copy()
        expected.real = np.array([9, 8, 7])

        np.testing.assert_equal(res, expected)

    def testSetGetImagExecution(self):
        a_data = np.array([1+2j, 3+4j, 5+6j])
        a = tensor(a_data, chunk_size=2)

        res = self.executor.execute_tensor(a.imag, concat=True)[0]
        expected = a_data.imag

        np.testing.assert_equal(res, expected)

        a.imag = 9

        res = self.executor.execute_tensor(a, concat=True)[0]
        expected = a_data.copy()
        expected.imag = 9

        np.testing.assert_equal(res, expected)

        a.imag = np.array([9, 8, 7])

        res = self.executor.execute_tensor(a, concat=True)[0]
        expected = a_data.copy()
        expected.imag = np.array([9, 8, 7])

        np.testing.assert_equal(res, expected)

        # test sparse
        a_data = np.array([[1+2j, 3+4j, 0], [0, 0, 0]])
        a = tensor(sps.csr_matrix(a_data))

        res = self.executor.execute_tensor(a.imag, concat=True)[0].toarray()
        expected = a_data.imag

        np.testing.assert_equal(res, expected)

        a.imag = 9

        res = self.executor.execute_tensor(a, concat=True)[0].toarray()
        expected = a_data.copy()
        expected.imag = 9

        np.testing.assert_equal(res, expected)

        a.imag = np.array([9, 8, 7])

        res = self.executor.execute_tensor(a, concat=True)[0].toarray()
        expected = a_data.copy()
        expected.imag = np.array([9, 8, 7])

        np.testing.assert_equal(res, expected)

    @require_cupy
    def testCupyExecution(self):
        a_data = np.random.rand(10, 10)
        b_data = np.random.rand(10, 10)

        a = tensor(a_data, gpu=True, chunk_size=3)
        b = tensor(b_data, gpu=True, chunk_size=3)
        res_binary = self.executor.execute_tensor((a + b), concat=True)[0]
        np.testing.assert_array_equal(res_binary.get(), (a_data + b_data))

        res_unary = self.executor.execute_tensor(cos(a), concat=True)[0]
        np.testing.assert_array_almost_equal(res_unary.get(), np.cos(a_data))
Ejemplo n.º 3
0
    def testTensordotExecution(self):
        size_executor = ExecutorForTest(
            sync_provider_type=ExecutorForTest.SyncProviderType.MOCK)

        a_data = np.arange(60).reshape(3, 4, 5)
        a = tensor(a_data, chunk_size=2)
        b_data = np.arange(24).reshape(4, 3, 2)
        b = tensor(b_data, chunk_size=2)

        axes = ([1, 0], [0, 1])
        c = tensordot(a, b, axes=axes)
        size_res = size_executor.execute_tensor(c, mock=True)
        self.assertEqual(sum(s[0] for s in size_res), c.nbytes)
        self.assertEqual(sum(s[1] for s in size_res), c.nbytes)

        res = self.executor.execute_tensor(c)
        expected = np.tensordot(a_data, b_data, axes=axes)
        self.assertTrue(np.array_equal(res[0], expected[:2, :]))
        self.assertTrue(np.array_equal(res[1], expected[2:4, :]))
        self.assertTrue(np.array_equal(res[2], expected[4:, :]))

        a = ones((1000, 2000), chunk_size=500)
        b = ones((2000, 100), chunk_size=500)
        c = dot(a, b)
        res = self.executor.execute_tensor(c)
        expected = np.dot(np.ones((1000, 2000)), np.ones((2000, 100)))
        self.assertEqual(len(res), 2)
        self.assertTrue(np.array_equal(res[0], expected[:500, :]))
        self.assertTrue(np.array_equal(res[1], expected[500:, :]))

        a = ones((10, 8), chunk_size=2)
        b = ones((8, 10), chunk_size=2)
        c = a.dot(b)
        res = self.executor.execute_tensor(c)
        self.assertEqual(len(res), 25)
        for r in res:
            self.assertTrue(np.array_equal(r, np.tile([8], [2, 2])))

        a = ones((500, 500), chunk_size=500)
        b = ones((500, 100), chunk_size=500)
        c = a.dot(b)
        res = self.executor.execute_tensor(c)
        self.assertTrue(np.array_equal(res[0], np.tile([500], [500, 100])))

        raw_a = np.random.random((100, 200, 50))
        raw_b = np.random.random((200, 10, 100))
        a = tensor(raw_a, chunk_size=50)
        b = tensor(raw_b, chunk_size=33)
        c = tensordot(a, b, axes=((0, 1), (2, 0)))
        res = self.executor.execute_tensor(c, concat=True)
        expected = np.tensordot(raw_a, raw_b, axes=(c.op.a_axes, c.op.b_axes))
        self.assertTrue(np.allclose(res[0], expected))

        a = ones((1000, 2000), chunk_size=500)
        b = ones((100, 2000), chunk_size=500)
        c = inner(a, b)
        res = self.executor.execute_tensor(c)
        expected = np.inner(np.ones((1000, 2000)), np.ones((100, 2000)))
        self.assertEqual(len(res), 2)
        self.assertTrue(np.array_equal(res[0], expected[:500, :]))
        self.assertTrue(np.array_equal(res[1], expected[500:, :]))

        a = ones((100, 100), chunk_size=30)
        b = ones((100, 100), chunk_size=30)
        c = a.dot(b)
        res = self.executor.execute_tensor(c, concat=True)[0]
        np.testing.assert_array_equal(res, np.ones((100, 100)) * 100)
Ejemplo n.º 4
0
class Test(TestBase):
    def setUp(self):
        super().setUp()
        self.executor = ExecutorForTest('numpy')

    def testCreateSparseExecution(self):
        mat = sps.csr_matrix([[0, 0, 2], [2, 0, 0]])
        t = tensor(mat, dtype='f8', chunk_size=2)

        res = self.executor.execute_tensor(t)
        self.assertIsInstance(res[0], SparseNDArray)
        self.assertEqual(res[0].dtype, np.float64)
        np.testing.assert_array_equal(res[0].toarray(), mat[..., :2].toarray())
        np.testing.assert_array_equal(res[1].toarray(), mat[..., 2:].toarray())

        t2 = ones_like(t, dtype='f4')

        res = self.executor.execute_tensor(t2)
        expected = sps.csr_matrix([[0, 0, 1], [1, 0, 0]])
        self.assertIsInstance(res[0], SparseNDArray)
        self.assertEqual(res[0].dtype, np.float32)
        np.testing.assert_array_equal(res[0].toarray(),
                                      expected[..., :2].toarray())
        np.testing.assert_array_equal(res[1].toarray(), expected[...,
                                                                 2:].toarray())

        t3 = tensor(np.array([[0, 0, 2], [2, 0, 0]]), chunk_size=2).tosparse()

        res = self.executor.execute_tensor(t3)
        self.assertIsInstance(res[0], SparseNDArray)
        self.assertEqual(res[0].dtype, np.int_)
        np.testing.assert_array_equal(res[0].toarray(), mat[..., :2].toarray())
        np.testing.assert_array_equal(res[1].toarray(), mat[..., 2:].toarray())

    def testZerosExecution(self):
        t = zeros((20, 30), dtype='i8', chunk_size=5)

        res = self.executor.execute_tensor(t, concat=True)
        np.testing.assert_array_equal(res[0], np.zeros((20, 30), dtype='i8'))
        self.assertEqual(res[0].dtype, np.int64)

        t2 = zeros_like(t)
        res = self.executor.execute_tensor(t2, concat=True)
        np.testing.assert_array_equal(res[0], np.zeros((20, 30), dtype='i8'))
        self.assertEqual(res[0].dtype, np.int64)

        t = zeros((20, 30), dtype='i4', chunk_size=5, sparse=True)
        res = self.executor.execute_tensor(t, concat=True)

        self.assertEqual(res[0].nnz, 0)

        t = zeros((20, 30), dtype='i8', chunk_size=6, order='F')
        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.zeros((20, 30), dtype='i8', order='F')
        self.assertEqual(res.flags['C_CONTIGUOUS'],
                         expected.flags['C_CONTIGUOUS'])
        self.assertEqual(res.flags['F_CONTIGUOUS'],
                         expected.flags['F_CONTIGUOUS'])

    def testEmptyExecution(self):
        t = empty((20, 30), dtype='i8', chunk_size=5)

        res = self.executor.execute_tensor(t, concat=True)
        self.assertEqual(res[0].shape, (20, 30))
        self.assertEqual(res[0].dtype, np.int64)

        t = empty((20, 30), chunk_size=5)

        res = self.executor.execute_tensor(t, concat=True)
        self.assertEqual(res[0].shape, (20, 30))
        self.assertEqual(res[0].dtype, np.float64)

        t2 = empty_like(t)
        res = self.executor.execute_tensor(t2, concat=True)
        self.assertEqual(res[0].shape, (20, 30))
        self.assertEqual(res[0].dtype, np.float64)

        t = empty((20, 30), dtype='i8', chunk_size=5, order='F')

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.empty((20, 30), dtype='i8', order='F')
        self.assertEqual(res.flags['C_CONTIGUOUS'],
                         expected.flags['C_CONTIGUOUS'])
        self.assertEqual(res.flags['F_CONTIGUOUS'],
                         expected.flags['F_CONTIGUOUS'])

    def testFullExecution(self):
        t = full((2, 2), 1, dtype='f4', chunk_size=1)

        res = self.executor.execute_tensor(t, concat=True)
        np.testing.assert_array_equal(res[0], np.full((2, 2), 1, dtype='f4'))

        t = full((2, 2), [1, 2], dtype='f8', chunk_size=1)

        res = self.executor.execute_tensor(t, concat=True)
        np.testing.assert_array_equal(res[0],
                                      np.full((2, 2), [1, 2], dtype='f8'))

        t = full((2, 2), 1, dtype='f4', chunk_size=1, order='F')

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.full((2, 2), 1, dtype='f4', order='F')
        self.assertEqual(res.flags['C_CONTIGUOUS'],
                         expected.flags['C_CONTIGUOUS'])
        self.assertEqual(res.flags['F_CONTIGUOUS'],
                         expected.flags['F_CONTIGUOUS'])

        t2 = full_like(t, 10, order='F')

        res = self.executor.execute_tensor(t2, concat=True)[0]
        expected = np.full((2, 2), 10, dtype='f4', order='F')
        np.testing.assert_array_equal(res, expected)
        self.assertEqual(res.flags['C_CONTIGUOUS'],
                         expected.flags['C_CONTIGUOUS'])
        self.assertEqual(res.flags['F_CONTIGUOUS'],
                         expected.flags['F_CONTIGUOUS'])

    def testArangeExecution(self):
        t = arange(1, 20, 3, chunk_size=2)

        res = self.executor.execute_tensor(t, concat=True)[0]
        self.assertTrue(np.array_equal(res, np.arange(1, 20, 3)))

        t = arange(1, 20, .3, chunk_size=4)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.arange(1, 20, .3)
        self.assertTrue(np.allclose(res, expected))

        t = arange(1.0, 1.8, .3, chunk_size=4)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.arange(1.0, 1.8, .3)
        self.assertTrue(np.allclose(res, expected))

        t = arange('1066-10-13',
                   '1066-10-31',
                   dtype=np.datetime64,
                   chunk_size=3)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.arange('1066-10-13', '1066-10-31', dtype=np.datetime64)
        self.assertTrue(np.array_equal(res, expected))

    def testDiagExecution(self):
        # 2-d  6 * 6
        a = arange(36, chunk_size=2).reshape(6, 6)

        d = diag(a)
        res = self.executor.execute_tensor(d, concat=True)[0]
        expected = np.diag(np.arange(36).reshape(6, 6))
        np.testing.assert_equal(res, expected)

        d = diag(a, k=1)
        res = self.executor.execute_tensor(d, concat=True)[0]
        expected = np.diag(np.arange(36).reshape(6, 6), k=1)
        np.testing.assert_equal(res, expected)

        d = diag(a, k=3)
        res = self.executor.execute_tensor(d, concat=True)[0]
        expected = np.diag(np.arange(36).reshape(6, 6), k=3)
        np.testing.assert_equal(res, expected)

        d = diag(a, k=-2)
        res = self.executor.execute_tensor(d, concat=True)[0]
        expected = np.diag(np.arange(36).reshape(6, 6), k=-2)
        np.testing.assert_equal(res, expected)

        d = diag(a, k=-5)
        res = self.executor.execute_tensor(d)[0]
        expected = np.diag(np.arange(36).reshape(6, 6), k=-5)
        np.testing.assert_equal(res, expected)

        # 2-d  6 * 6 sparse, no tensor
        a = sps.rand(6, 6, density=.1)

        d = diag(a)
        res = self.executor.execute_tensor(d, concat=True)[0]
        expected = np.diag(a.toarray())
        np.testing.assert_equal(res.toarray(), expected)

        d = diag(a, k=1)
        res = self.executor.execute_tensor(d, concat=True)[0]
        expected = np.diag(a.toarray(), k=1)
        np.testing.assert_equal(res.toarray(), expected)

        d = diag(a, k=3)
        res = self.executor.execute_tensor(d, concat=True)[0]
        expected = np.diag(a.toarray(), k=3)
        np.testing.assert_equal(res.toarray(), expected)

        d = diag(a, k=-2)
        res = self.executor.execute_tensor(d, concat=True)[0]
        expected = np.diag(a.toarray(), k=-2)
        np.testing.assert_equal(res.toarray(), expected)

        d = diag(a, k=-5)
        res = self.executor.execute_tensor(d)[0]
        expected = np.diag(a.toarray(), k=-5)
        np.testing.assert_equal(res.toarray(), expected)

        # 2-d  6 * 6 sparse, from tensor
        raw_a = sps.rand(6, 6, density=.1)
        a = tensor(raw_a, chunk_size=2)

        d = diag(a)
        res = self.executor.execute_tensor(d, concat=True)[0]
        expected = np.diag(raw_a.toarray())
        np.testing.assert_equal(res.toarray(), expected)

        d = diag(a, k=1)
        res = self.executor.execute_tensor(d, concat=True)[0]
        expected = np.diag(raw_a.toarray(), k=1)
        np.testing.assert_equal(res.toarray(), expected)

        d = diag(a, k=3)
        res = self.executor.execute_tensor(d, concat=True)[0]
        expected = np.diag(raw_a.toarray(), k=3)
        np.testing.assert_equal(res.toarray(), expected)

        d = diag(a, k=-2)
        res = self.executor.execute_tensor(d, concat=True)[0]
        expected = np.diag(raw_a.toarray(), k=-2)
        np.testing.assert_equal(res.toarray(), expected)

        d = diag(a, k=-5)
        res = self.executor.execute_tensor(d)[0]
        expected = np.diag(raw_a.toarray(), k=-5)
        np.testing.assert_equal(res.toarray(), expected)

        # 2-d  4 * 9
        a = arange(36, chunk_size=2).reshape(4, 9)

        d = diag(a)
        res = self.executor.execute_tensor(d, concat=True)[0]
        expected = np.diag(np.arange(36).reshape(4, 9))
        np.testing.assert_equal(res, expected)

        d = diag(a, k=1)
        res = self.executor.execute_tensor(d, concat=True)[0]
        expected = np.diag(np.arange(36).reshape(4, 9), k=1)
        np.testing.assert_equal(res, expected)

        d = diag(a, k=3)
        res = self.executor.execute_tensor(d, concat=True)[0]
        expected = np.diag(np.arange(36).reshape(4, 9), k=3)
        np.testing.assert_equal(res, expected)

        d = diag(a, k=-2)
        res = self.executor.execute_tensor(d, concat=True)[0]
        expected = np.diag(np.arange(36).reshape(4, 9), k=-2)
        np.testing.assert_equal(res, expected)

        d = diag(a, k=-3)
        res = self.executor.execute_tensor(d)[0]
        expected = np.diag(np.arange(36).reshape(4, 9), k=-3)
        np.testing.assert_equal(res, expected)

        # 2-d  4 * 9 sparse, no tensor
        a = sps.rand(4, 9, density=.1)

        d = diag(a)
        res = self.executor.execute_tensor(d, concat=True)[0]
        expected = np.diag(a.toarray())
        np.testing.assert_equal(res.toarray(), expected)

        d = diag(a, k=1)
        res = self.executor.execute_tensor(d, concat=True)[0]
        expected = np.diag(a.toarray(), k=1)
        np.testing.assert_equal(res.toarray(), expected)

        d = diag(a, k=3)
        res = self.executor.execute_tensor(d, concat=True)[0]
        expected = np.diag(a.toarray(), k=3)
        np.testing.assert_equal(res.toarray(), expected)

        d = diag(a, k=-2)
        res = self.executor.execute_tensor(d, concat=True)[0]
        expected = np.diag(a.toarray(), k=-2)
        np.testing.assert_equal(res.toarray(), expected)

        d = diag(a, k=-3)
        res = self.executor.execute_tensor(d)[0]
        expected = np.diag(a.toarray(), k=-3)
        np.testing.assert_equal(res.toarray(), expected)

        # 2-d  4 * 9 sparse, from tensor
        raw_a = sps.rand(4, 9, density=.1)
        a = tensor(raw_a, chunk_size=2)

        d = diag(a)
        res = self.executor.execute_tensor(d, concat=True)[0]
        expected = np.diag(raw_a.toarray())
        np.testing.assert_equal(res.toarray(), expected)

        d = diag(a, k=1)
        res = self.executor.execute_tensor(d, concat=True)[0]
        expected = np.diag(raw_a.toarray(), k=1)
        np.testing.assert_equal(res.toarray(), expected)

        d = diag(a, k=3)
        res = self.executor.execute_tensor(d, concat=True)[0]
        expected = np.diag(raw_a.toarray(), k=3)
        np.testing.assert_equal(res.toarray(), expected)

        d = diag(a, k=-2)
        res = self.executor.execute_tensor(d, concat=True)[0]
        expected = np.diag(raw_a.toarray(), k=-2)
        np.testing.assert_equal(res.toarray(), expected)

        d = diag(a, k=-3)
        res = self.executor.execute_tensor(d)[0]
        expected = np.diag(raw_a.toarray(), k=-3)
        np.testing.assert_equal(res.toarray(), expected)

        # 1-d
        a = arange(5, chunk_size=2)

        d = diag(a)
        res = self.executor.execute_tensor(d, concat=True)[0]
        expected = np.diag(np.arange(5))
        np.testing.assert_equal(res, expected)
        self.assertTrue(res.flags['C_CONTIGUOUS'])
        self.assertFalse(res.flags['F_CONTIGUOUS'])

        d = diag(a, k=1)
        res = self.executor.execute_tensor(d, concat=True)[0]
        expected = np.diag(np.arange(5), k=1)
        np.testing.assert_equal(res, expected)

        d = diag(a, k=3)
        res = self.executor.execute_tensor(d, concat=True)[0]
        expected = np.diag(np.arange(5), k=3)
        np.testing.assert_equal(res, expected)

        d = diag(a, k=-2)
        res = self.executor.execute_tensor(d, concat=True)[0]
        expected = np.diag(np.arange(5), k=-2)
        np.testing.assert_equal(res, expected)

        d = diag(a, k=-3)
        res = self.executor.execute_tensor(d, concat=True)[0]
        expected = np.diag(np.arange(5), k=-3)
        np.testing.assert_equal(res, expected)

        d = diag(a, sparse=True)
        res = self.executor.execute_tensor(d, concat=True)[0]
        expected = np.diag(np.arange(5))
        self.assertIsInstance(res, SparseNDArray)
        np.testing.assert_equal(res.toarray(), expected)

        d = diag(a, k=1, sparse=True)
        res = self.executor.execute_tensor(d, concat=True)[0]
        expected = np.diag(np.arange(5), k=1)
        self.assertIsInstance(res, SparseNDArray)
        np.testing.assert_equal(res.toarray(), expected)

        d = diag(a, k=2, sparse=True)
        res = self.executor.execute_tensor(d, concat=True)[0]
        expected = np.diag(np.arange(5), k=2)
        self.assertIsInstance(res, SparseNDArray)
        np.testing.assert_equal(res.toarray(), expected)

        d = diag(a, k=-2, sparse=True)
        res = self.executor.execute_tensor(d, concat=True)[0]
        expected = np.diag(np.arange(5), k=-2)
        self.assertIsInstance(res, SparseNDArray)
        np.testing.assert_equal(res.toarray(), expected)

        d = diag(a, k=-3, sparse=True)
        res = self.executor.execute_tensor(d, concat=True)[0]
        expected = np.diag(np.arange(5), k=-3)
        self.assertIsInstance(res, SparseNDArray)
        np.testing.assert_equal(res.toarray(), expected)

    def testDiagflatExecution(self):
        a = diagflat([[1, 2], [3, 4]], chunk_size=1)

        res = self.executor.execute_tensor(a, concat=True)[0]
        expected = np.diagflat([[1, 2], [3, 4]])
        np.testing.assert_equal(res, expected)

        d = tensor([[1, 2], [3, 4]], chunk_size=1)
        a = diagflat(d)

        res = self.executor.execute_tensor(a, concat=True)[0]
        expected = np.diagflat([[1, 2], [3, 4]])
        np.testing.assert_equal(res, expected)

        a = diagflat([1, 2], 1, chunk_size=1)

        res = self.executor.execute_tensor(a, concat=True)[0]
        expected = np.diagflat([1, 2], 1)
        np.testing.assert_equal(res, expected)

        d = tensor([[1, 2]], chunk_size=1)
        a = diagflat(d, 1)

        res = self.executor.execute_tensor(a, concat=True)[0]
        expected = np.diagflat([1, 2], 1)
        np.testing.assert_equal(res, expected)

    def testEyeExecution(self):
        t = eye(5, chunk_size=2)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.eye(5)
        np.testing.assert_equal(res, expected)

        t = eye(5, k=1, chunk_size=2)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.eye(5, k=1)
        np.testing.assert_equal(res, expected)

        t = eye(5, k=2, chunk_size=2)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.eye(5, k=2)
        np.testing.assert_equal(res, expected)

        t = eye(5, k=-1, chunk_size=2)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.eye(5, k=-1)
        np.testing.assert_equal(res, expected)

        t = eye(5, k=-3, chunk_size=2)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.eye(5, k=-3)
        np.testing.assert_equal(res, expected)

        t = eye(5, M=3, k=1, chunk_size=2)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.eye(5, M=3, k=1)
        np.testing.assert_equal(res, expected)

        t = eye(5, M=3, k=-3, chunk_size=2)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.eye(5, M=3, k=-3)
        np.testing.assert_equal(res, expected)

        t = eye(5, M=7, k=1, chunk_size=2)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.eye(5, M=7, k=1)
        np.testing.assert_equal(res, expected)

        t = eye(5, M=8, k=-3, chunk_size=2)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.eye(5, M=8, k=-3)
        np.testing.assert_equal(res, expected)

        t = eye(2, dtype=int)

        res = self.executor.execute_tensor(t, concat=True)[0]
        self.assertEqual(res.dtype, np.int_)

        # test sparse
        t = eye(5, sparse=True, chunk_size=2)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.eye(5)
        self.assertIsInstance(res, SparseNDArray)
        np.testing.assert_equal(res.toarray(), expected)

        t = eye(5, k=1, sparse=True, chunk_size=2)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.eye(5, k=1)
        self.assertIsInstance(res, SparseNDArray)
        np.testing.assert_equal(res.toarray(), expected)

        t = eye(5, k=2, sparse=True, chunk_size=2)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.eye(5, k=2)
        self.assertIsInstance(res, SparseNDArray)
        np.testing.assert_equal(res.toarray(), expected)

        t = eye(5, k=-1, sparse=True, chunk_size=2)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.eye(5, k=-1)
        self.assertIsInstance(res, SparseNDArray)
        np.testing.assert_equal(res.toarray(), expected)

        t = eye(5, k=-3, sparse=True, chunk_size=2)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.eye(5, k=-3)
        self.assertIsInstance(res, SparseNDArray)
        np.testing.assert_equal(res.toarray(), expected)

        t = eye(5, M=3, k=1, sparse=True, chunk_size=2)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.eye(5, M=3, k=1)
        self.assertIsInstance(res, SparseNDArray)
        np.testing.assert_equal(res.toarray(), expected)

        t = eye(5, M=3, k=-3, sparse=True, chunk_size=2)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.eye(5, M=3, k=-3)
        self.assertIsInstance(res, SparseNDArray)
        np.testing.assert_equal(res.toarray(), expected)

        t = eye(5, M=7, k=1, sparse=True, chunk_size=2)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.eye(5, M=7, k=1)
        self.assertIsInstance(res, SparseNDArray)
        np.testing.assert_equal(res.toarray(), expected)

        t = eye(5, M=8, k=-3, sparse=True, chunk_size=2)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.eye(5, M=8, k=-3)
        self.assertIsInstance(res, SparseNDArray)
        np.testing.assert_equal(res.toarray(), expected)

        t = eye(5, M=9, k=-3, chunk_size=2, order='F')

        res = self.executor.execute_tensor(t, concat=True)[0]
        self.assertTrue(res.flags['C_CONTIGUOUS'])
        self.assertFalse(res.flags['F_CONTIGUOUS'])

    def testLinspaceExecution(self):
        a = linspace(2.0, 9.0, num=11, chunk_size=3)

        res = self.executor.execute_tensor(a, concat=True)[0]
        expected = np.linspace(2.0, 9.0, num=11)
        np.testing.assert_allclose(res, expected)

        a = linspace(2.0, 9.0, num=11, endpoint=False, chunk_size=3)

        res = self.executor.execute_tensor(a, concat=True)[0]
        expected = np.linspace(2.0, 9.0, num=11, endpoint=False)
        np.testing.assert_allclose(res, expected)

        a = linspace(2.0, 9.0, num=11, chunk_size=3, dtype=int)

        res = self.executor.execute_tensor(a, concat=True)[0]
        self.assertEqual(res.dtype, np.int_)

    def testMeshgridExecution(self):
        a = arange(5, chunk_size=2)
        b = arange(6, 12, chunk_size=3)
        c = arange(12, 19, chunk_size=4)

        A, B, C = meshgrid(a, b, c)

        A_res = self.executor.execute_tensor(A, concat=True)[0]
        A_expected = np.meshgrid(np.arange(5), np.arange(6, 12),
                                 np.arange(12, 19))[0]
        np.testing.assert_equal(A_res, A_expected)

        B_res = self.executor.execute_tensor(B, concat=True)[0]
        B_expected = np.meshgrid(np.arange(5), np.arange(6, 12),
                                 np.arange(12, 19))[1]
        np.testing.assert_equal(B_res, B_expected)

        C_res = self.executor.execute_tensor(C, concat=True)[0]
        C_expected = np.meshgrid(np.arange(5), np.arange(6, 12),
                                 np.arange(12, 19))[2]
        np.testing.assert_equal(C_res, C_expected)

        A, B, C = meshgrid(a, b, c, indexing='ij')

        A_res = self.executor.execute_tensor(A, concat=True)[0]
        A_expected = np.meshgrid(np.arange(5),
                                 np.arange(6, 12),
                                 np.arange(12, 19),
                                 indexing='ij')[0]
        np.testing.assert_equal(A_res, A_expected)

        B_res = self.executor.execute_tensor(B, concat=True)[0]
        B_expected = np.meshgrid(np.arange(5),
                                 np.arange(6, 12),
                                 np.arange(12, 19),
                                 indexing='ij')[1]
        np.testing.assert_equal(B_res, B_expected)

        C_res = self.executor.execute_tensor(C, concat=True)[0]
        C_expected = np.meshgrid(np.arange(5),
                                 np.arange(6, 12),
                                 np.arange(12, 19),
                                 indexing='ij')[2]
        np.testing.assert_equal(C_res, C_expected)

        A, B, C = meshgrid(a, b, c, sparse=True)

        A_res = self.executor.execute_tensor(A, concat=True)[0]
        A_expected = np.meshgrid(np.arange(5),
                                 np.arange(6, 12),
                                 np.arange(12, 19),
                                 sparse=True)[0]
        np.testing.assert_equal(A_res, A_expected)

        B_res = self.executor.execute_tensor(B, concat=True)[0]
        B_expected = np.meshgrid(np.arange(5),
                                 np.arange(6, 12),
                                 np.arange(12, 19),
                                 sparse=True)[1]
        np.testing.assert_equal(B_res, B_expected)

        C_res = self.executor.execute_tensor(C, concat=True)[0]
        C_expected = np.meshgrid(np.arange(5),
                                 np.arange(6, 12),
                                 np.arange(12, 19),
                                 sparse=True)[2]
        np.testing.assert_equal(C_res, C_expected)

        A, B, C = meshgrid(a, b, c, indexing='ij', sparse=True)

        A_res = self.executor.execute_tensor(A, concat=True)[0]
        A_expected = np.meshgrid(np.arange(5),
                                 np.arange(6, 12),
                                 np.arange(12, 19),
                                 indexing='ij',
                                 sparse=True)[0]
        np.testing.assert_equal(A_res, A_expected)

        B_res = self.executor.execute_tensor(B, concat=True)[0]
        B_expected = np.meshgrid(np.arange(5),
                                 np.arange(6, 12),
                                 np.arange(12, 19),
                                 indexing='ij',
                                 sparse=True)[1]
        np.testing.assert_equal(B_res, B_expected)

        C_res = self.executor.execute_tensor(C, concat=True)[0]
        C_expected = np.meshgrid(np.arange(5),
                                 np.arange(6, 12),
                                 np.arange(12, 19),
                                 indexing='ij',
                                 sparse=True)[2]
        np.testing.assert_equal(C_res, C_expected)

    def testIndicesExecution(self):
        grid = indices((2, 3), chunk_size=1)

        res = self.executor.execute_tensor(grid, concat=True)[0]
        expected = np.indices((2, 3))
        np.testing.assert_equal(res, expected)

        res = self.executor.execute_tensor(grid[0], concat=True)[0]
        np.testing.assert_equal(res, expected[0])

        res = self.executor.execute_tensor(grid[1], concat=True)[0]
        np.testing.assert_equal(res, expected[1])

    def testTriuExecution(self):
        a = arange(24, chunk_size=2).reshape(2, 3, 4)

        t = triu(a)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.triu(np.arange(24).reshape(2, 3, 4))
        np.testing.assert_equal(res, expected)

        t = triu(a, k=1)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.triu(np.arange(24).reshape(2, 3, 4), k=1)
        np.testing.assert_equal(res, expected)

        t = triu(a, k=2)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.triu(np.arange(24).reshape(2, 3, 4), k=2)
        np.testing.assert_equal(res, expected)

        t = triu(a, k=-1)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.triu(np.arange(24).reshape(2, 3, 4), k=-1)
        np.testing.assert_equal(res, expected)

        t = triu(a, k=-2)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.triu(np.arange(24).reshape(2, 3, 4), k=-2)
        np.testing.assert_equal(res, expected)

        # test sparse
        a = arange(12, chunk_size=2).reshape(3, 4).tosparse()

        t = triu(a)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.triu(np.arange(12).reshape(3, 4))
        self.assertIsInstance(res, SparseNDArray)
        np.testing.assert_equal(res, expected)

        t = triu(a, k=1)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.triu(np.arange(12).reshape(3, 4), k=1)
        self.assertIsInstance(res, SparseNDArray)
        np.testing.assert_equal(res, expected)

        t = triu(a, k=2)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.triu(np.arange(12).reshape(3, 4), k=2)
        self.assertIsInstance(res, SparseNDArray)
        np.testing.assert_equal(res, expected)

        t = triu(a, k=-1)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.triu(np.arange(12).reshape(3, 4), k=-1)
        self.assertIsInstance(res, SparseNDArray)
        np.testing.assert_equal(res, expected)

        t = triu(a, k=-2)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.triu(np.arange(12).reshape(3, 4), k=-2)
        self.assertIsInstance(res, SparseNDArray)
        np.testing.assert_equal(res, expected)

        raw = np.asfortranarray(np.random.rand(10, 7))
        a = tensor(raw, chunk_size=3)

        t = triu(a, k=-2)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.triu(raw, k=-2)
        np.testing.assert_array_equal(res, expected)
        self.assertEqual(res.flags['C_CONTIGUOUS'],
                         expected.flags['C_CONTIGUOUS'])
        self.assertEqual(res.flags['F_CONTIGUOUS'],
                         expected.flags['F_CONTIGUOUS'])

    def testTrilExecution(self):
        a = arange(24, chunk_size=2).reshape(2, 3, 4)

        t = tril(a)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.tril(np.arange(24).reshape(2, 3, 4))
        np.testing.assert_equal(res, expected)

        t = tril(a, k=1)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.tril(np.arange(24).reshape(2, 3, 4), k=1)
        np.testing.assert_equal(res, expected)

        t = tril(a, k=2)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.tril(np.arange(24).reshape(2, 3, 4), k=2)
        np.testing.assert_equal(res, expected)

        t = tril(a, k=-1)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.tril(np.arange(24).reshape(2, 3, 4), k=-1)
        np.testing.assert_equal(res, expected)

        t = tril(a, k=-2)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.tril(np.arange(24).reshape(2, 3, 4), k=-2)
        np.testing.assert_equal(res, expected)

        a = arange(12, chunk_size=2).reshape(3, 4).tosparse()

        t = tril(a)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.tril(np.arange(12).reshape(3, 4))
        self.assertIsInstance(res, SparseNDArray)
        np.testing.assert_equal(res, expected)

        t = tril(a, k=1)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.tril(np.arange(12).reshape(3, 4), k=1)
        self.assertIsInstance(res, SparseNDArray)
        np.testing.assert_equal(res, expected)

        t = tril(a, k=2)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.tril(np.arange(12).reshape(3, 4), k=2)
        self.assertIsInstance(res, SparseNDArray)
        np.testing.assert_equal(res, expected)

        t = tril(a, k=-1)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.tril(np.arange(12).reshape(3, 4), k=-1)
        self.assertIsInstance(res, SparseNDArray)
        np.testing.assert_equal(res, expected)

        t = tril(a, k=-2)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.tril(np.arange(12).reshape(3, 4), k=-2)
        self.assertIsInstance(res, SparseNDArray)
        np.testing.assert_equal(res, expected)

    def testIndexTrickExecution(self):
        mgrid = nd_grid()
        t = mgrid[0:5, 0:5]

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.lib.index_tricks.nd_grid()[0:5, 0:5]
        np.testing.assert_equal(res, expected)

        t = mgrid[-1:1:5j]

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.lib.index_tricks.nd_grid()[-1:1:5j]
        np.testing.assert_equal(res, expected)

        ogrid = nd_grid(sparse=True)

        t = ogrid[0:5, 0:5]

        res = [self.executor.execute_tensor(o, concat=True)[0] for o in t]
        expected = np.lib.index_tricks.nd_grid(sparse=True)[0:5, 0:5]
        [np.testing.assert_equal(r, e) for r, e in zip(res, expected)]

    @unittest.skipIf(tiledb is None, 'tiledb not installed')
    def testReadTileDBExecution(self):
        ctx = tiledb.Ctx()

        tempdir = tempfile.mkdtemp()
        try:
            # create TileDB dense array
            dom = tiledb.Domain(
                tiledb.Dim(ctx=ctx, domain=(1, 100), tile=30, dtype=np.int32),
                tiledb.Dim(ctx=ctx, domain=(0, 90), tile=22, dtype=np.int32),
                tiledb.Dim(ctx=ctx, domain=(0, 9), tile=8, dtype=np.int32),
                ctx=ctx,
            )
            schema = tiledb.ArraySchema(
                ctx=ctx,
                domain=dom,
                sparse=False,
                attrs=[tiledb.Attr(ctx=ctx, dtype=np.float64)])
            tiledb.DenseArray.create(tempdir, schema)

            expected = np.random.rand(100, 91, 10)
            with tiledb.DenseArray(uri=tempdir, ctx=ctx, mode='w') as arr:
                arr.write_direct(expected)

            a = fromtiledb(tempdir, ctx=ctx)
            result = self.executor.execute_tensor(a, concat=True)[0]

            np.testing.assert_allclose(expected, result)
        finally:
            shutil.rmtree(tempdir)

        tempdir = tempfile.mkdtemp()
        try:
            # create 2-d TileDB sparse array
            dom = tiledb.Domain(
                tiledb.Dim(ctx=ctx, domain=(0, 99), tile=30, dtype=np.int32),
                tiledb.Dim(ctx=ctx, domain=(2, 11), tile=8, dtype=np.int32),
                ctx=ctx,
            )
            schema = tiledb.ArraySchema(
                ctx=ctx,
                domain=dom,
                sparse=True,
                attrs=[tiledb.Attr(ctx=ctx, dtype=np.float64)])
            tiledb.SparseArray.create(tempdir, schema)

            expected = sps.rand(100, 10, density=0.01)
            with tiledb.SparseArray(uri=tempdir, ctx=ctx, mode='w') as arr:
                I, J = expected.row, expected.col + 2
                arr[I, J] = {arr.attr(0).name: expected.data}

            a = fromtiledb(tempdir, ctx=ctx)
            result = self.executor.execute_tensor(a, concat=True)[0]

            np.testing.assert_allclose(expected.toarray(), result.toarray())
        finally:
            shutil.rmtree(tempdir)

        tempdir = tempfile.mkdtemp()
        try:
            # create 1-d TileDB sparse array
            dom = tiledb.Domain(
                tiledb.Dim(ctx=ctx, domain=(1, 100), tile=30, dtype=np.int32),
                ctx=ctx,
            )
            schema = tiledb.ArraySchema(
                ctx=ctx,
                domain=dom,
                sparse=True,
                attrs=[tiledb.Attr(ctx=ctx, dtype=np.float64)])
            tiledb.SparseArray.create(tempdir, schema)

            expected = sps.rand(1, 100, density=0.05)
            with tiledb.SparseArray(uri=tempdir, ctx=ctx, mode='w') as arr:
                I = expected.col + 1
                arr[I] = expected.data

            a = fromtiledb(tempdir, ctx=ctx)
            result = self.executor.execute_tensor(a, concat=True)[0]

            np.testing.assert_allclose(expected.toarray()[0], result.toarray())
        finally:
            shutil.rmtree(tempdir)

        tempdir = tempfile.mkdtemp()
        try:
            # create TileDB dense array with column-major
            dom = tiledb.Domain(
                tiledb.Dim(ctx=ctx, domain=(1, 100), tile=30, dtype=np.int32),
                tiledb.Dim(ctx=ctx, domain=(0, 90), tile=22, dtype=np.int32),
                tiledb.Dim(ctx=ctx, domain=(0, 9), tile=8, dtype=np.int32),
                ctx=ctx,
            )
            schema = tiledb.ArraySchema(
                ctx=ctx,
                domain=dom,
                sparse=False,
                cell_order='F',
                attrs=[tiledb.Attr(ctx=ctx, dtype=np.float64)])
            tiledb.DenseArray.create(tempdir, schema)

            expected = np.asfortranarray(np.random.rand(100, 91, 10))
            with tiledb.DenseArray(uri=tempdir, ctx=ctx, mode='w') as arr:
                arr.write_direct(expected)

            a = fromtiledb(tempdir, ctx=ctx)
            result = self.executor.execute_tensor(a, concat=True)[0]

            np.testing.assert_allclose(expected, result)
            self.assertTrue(result.flags['F_CONTIGUOUS'])
            self.assertFalse(result.flags['C_CONTIGUOUS'])
        finally:
            shutil.rmtree(tempdir)

    def testFromDataFrameExecution(self):
        mdf = md.DataFrame({
            'angle': [0, 3, 4],
            'degree': [360, 180, 360]
        },
                           index=['circle', 'triangle', 'rectangle'])
        tensor_result = self.executor.execute_tensor(from_dataframe(mdf))
        tensor_expected = self.executor.execute_tensor(
            mt.tensor([[0, 360], [3, 180], [4, 360]]))
        np.testing.assert_equal(tensor_result, tensor_expected)

        # test up-casting
        mdf2 = md.DataFrame({'a': [0.1, 0.2, 0.3], 'b': [1, 2, 3]})
        tensor_result2 = self.executor.execute_tensor(from_dataframe(mdf2))
        np.testing.assert_equal(tensor_result2[0].dtype, np.dtype('float64'))
        tensor_expected2 = self.executor.execute_tensor(
            mt.tensor([[0.1, 1.0], [0.2, 2.0], [0.3, 3.0]]))
        np.testing.assert_equal(tensor_result2, tensor_expected2)

        raw = [[0.1, 0.2, 0.4], [0.4, 0.7, 0.3]]
        mdf3 = md.DataFrame(raw, columns=list('abc'), chunk_size=2)
        tensor_result3 = self.executor.execute_tensor(from_dataframe(mdf3),
                                                      concat=True)[0]
        np.testing.assert_array_equal(tensor_result3, np.asarray(raw))
        self.assertTrue(tensor_result3.flags['F_CONTIGUOUS'])
        self.assertFalse(tensor_result3.flags['C_CONTIGUOUS'])

        # test from series
        series = md.Series([1, 2, 3])
        tensor_result = series.to_tensor().execute()
        np.testing.assert_array_equal(tensor_result, np.array([1, 2, 3]))

        series = md.Series(range(10), chunk_size=3)
        tensor_result = series.to_tensor().execute()
        np.testing.assert_array_equal(tensor_result, np.arange(10))

    @unittest.skipIf(h5py is None, 'h5py not installed')
    def testReadHDF5Execution(self):
        test_array = np.random.RandomState(0).rand(20, 10)
        group_name = 'test_group'
        dataset_name = 'test_dataset'

        with self.assertRaises(TypeError):
            fromhdf5(object())

        with tempfile.TemporaryDirectory() as d:
            filename = os.path.join(
                d, 'test_read_{}.hdf5'.format(int(time.time())))
            with h5py.File(filename, 'w') as f:
                g = f.create_group(group_name)
                g.create_dataset(dataset_name, chunks=(7, 4), data=test_array)

            # test filename
            r = fromhdf5(filename, group=group_name, dataset=dataset_name)

            result = self.executor.execute_tensor(r, concat=True)[0]
            np.testing.assert_array_equal(result, test_array)
            self.assertEqual(r.extra_params['raw_chunk_size'], (7, 4))

            with self.assertRaises(ValueError):
                fromhdf5(filename)

            with self.assertRaises(ValueError):
                fromhdf5(filename, dataset='non_exist')

            with h5py.File(filename, 'r') as f:
                # test file
                r = fromhdf5(f, group=group_name, dataset=dataset_name)

                result = self.executor.execute_tensor(r, concat=True)[0]
                np.testing.assert_array_equal(result, test_array)

                with self.assertRaises(ValueError):
                    fromhdf5(f)

                with self.assertRaises(ValueError):
                    fromhdf5(f, dataset='non_exist')

                # test dataset
                ds = f['{}/{}'.format(group_name, dataset_name)]
                r = fromhdf5(ds)

                result = self.executor.execute_tensor(r, concat=True)[0]
                np.testing.assert_array_equal(result, test_array)

    @unittest.skipIf(zarr is None, 'zarr not installed')
    def testReadZarrExecution(self):
        test_array = np.random.RandomState(0).rand(20, 10)
        group_name = 'test_group'
        dataset_name = 'test_dataset'

        with self.assertRaises(TypeError):
            fromzarr(object())

        with tempfile.TemporaryDirectory() as d:
            path = os.path.join(d,
                                'test_read_{}.zarr'.format(int(time.time())))

            group = zarr.group(path)
            arr = group.array(group_name + '/' + dataset_name,
                              test_array,
                              chunks=(7, 4))

            r = fromzarr(arr)

            result = self.executor.execute_tensor(r, concat=True)[0]
            np.testing.assert_array_equal(result, test_array)
            self.assertGreater(len(get_tiled(r).chunks), 1)

            arr = zarr.open_array('{}/{}/{}'.format(path, group_name,
                                                    dataset_name))
            r = fromzarr(arr)

            result = self.executor.execute_tensor(r, concat=True)[0]
            np.testing.assert_array_equal(result, test_array)
            self.assertGreater(len(get_tiled(r).chunks), 1)

            r = fromzarr(path, group=group_name, dataset=dataset_name)

            result = self.executor.execute_tensor(r, concat=True)[0]
            np.testing.assert_array_equal(result, test_array)
            self.assertGreater(len(get_tiled(r).chunks), 1)

            r = fromzarr(path + '/' + group_name + '/' + dataset_name)

            result = self.executor.execute_tensor(r, concat=True)[0]
            np.testing.assert_array_equal(result, test_array)
            self.assertGreater(len(get_tiled(r).chunks), 1)
Ejemplo n.º 5
0
class Test(unittest.TestCase):
    def setUp(self):
        self.executor = ExecutorForTest('numpy')

    def testConcatenateExecution(self):
        a_data = np.random.rand(10, 20, 30)
        b_data = np.random.rand(10, 20, 40)
        c_data = np.random.rand(10, 20, 50)

        a = tensor(a_data, chunk_size=5)
        b = tensor(b_data, chunk_size=6)
        c = tensor(c_data, chunk_size=7)

        d = concatenate([a, b, c], axis=-1)
        res = self.executor.execute_tensor(d, concat=True)[0]
        expected = np.concatenate([a_data, b_data, c_data], axis=-1)
        self.assertTrue(np.array_equal(res, expected))

        a_data = sps.random(10, 30)
        b_data = sps.rand(10, 40)
        c_data = sps.rand(10, 50)

        a = tensor(a_data, chunk_size=5)
        b = tensor(b_data, chunk_size=6)
        c = tensor(c_data, chunk_size=7)

        d = concatenate([a, b, c], axis=-1)
        res = self.executor.execute_tensor(d, concat=True)[0]
        expected = np.concatenate([a_data.A, b_data.A, c_data.A], axis=-1)
        self.assertTrue(np.array_equal(res.toarray(), expected))

    def testStackExecution(self):
        raw = [np.random.randn(3, 4) for _ in range(10)]
        arrs = [tensor(a, chunk_size=3) for a in raw]

        arr2 = stack(arrs)
        res = self.executor.execute_tensor(arr2, concat=True)
        self.assertTrue(np.array_equal(res[0], np.stack(raw)))

        arr3 = stack(arrs, axis=1)
        res = self.executor.execute_tensor(arr3, concat=True)
        self.assertTrue(np.array_equal(res[0], np.stack(raw, axis=1)))

        arr4 = stack(arrs, axis=2)
        res = self.executor.execute_tensor(arr4, concat=True)
        self.assertTrue(np.array_equal(res[0], np.stack(raw, axis=2)))

        raw2 = [np.asfortranarray(np.random.randn(3, 4)) for _ in range(10)]
        arr5 = [tensor(a, chunk_size=3) for a in raw2]

        arr6 = stack(arr5)
        res = self.executor.execute_tensor(arr6, concat=True)[0]
        expected = np.stack(raw2).copy('A')
        np.testing.assert_array_equal(res, expected)
        self.assertEqual(res.flags['C_CONTIGUOUS'],
                         expected.flags['C_CONTIGUOUS'])
        self.assertEqual(res.flags['F_CONTIGUOUS'],
                         expected.flags['F_CONTIGUOUS'])

        arr7 = stack(arr5, out=empty((10, 3, 4), order='F'))
        res = self.executor.execute_tensor(arr7, concat=True)[0]
        expected = np.stack(raw2, out=np.empty((10, 3, 4),
                                               order='F')).copy('A')
        np.testing.assert_array_equal(res, expected)
        self.assertEqual(res.flags['C_CONTIGUOUS'],
                         expected.flags['C_CONTIGUOUS'])
        self.assertEqual(res.flags['F_CONTIGUOUS'],
                         expected.flags['F_CONTIGUOUS'])

    def testHStackExecution(self):
        a_data = np.random.rand(10)
        b_data = np.random.rand(20)

        a = tensor(a_data, chunk_size=4)
        b = tensor(b_data, chunk_size=4)

        c = hstack([a, b])
        res = self.executor.execute_tensor(c, concat=True)[0]
        expected = np.hstack([a_data, b_data])
        self.assertTrue(np.array_equal(res, expected))

        a_data = np.random.rand(10, 20)
        b_data = np.random.rand(10, 5)

        a = tensor(a_data, chunk_size=3)
        b = tensor(b_data, chunk_size=4)

        c = hstack([a, b])
        res = self.executor.execute_tensor(c, concat=True)[0]
        expected = np.hstack([a_data, b_data])
        self.assertTrue(np.array_equal(res, expected))

    def testVStackExecution(self):
        a_data = np.random.rand(10)
        b_data = np.random.rand(10)

        a = tensor(a_data, chunk_size=4)
        b = tensor(b_data, chunk_size=4)

        c = vstack([a, b])
        res = self.executor.execute_tensor(c, concat=True)[0]
        expected = np.vstack([a_data, b_data])
        self.assertTrue(np.array_equal(res, expected))

        a_data = np.random.rand(10, 20)
        b_data = np.random.rand(5, 20)

        a = tensor(a_data, chunk_size=3)
        b = tensor(b_data, chunk_size=4)

        c = vstack([a, b])
        res = self.executor.execute_tensor(c, concat=True)[0]
        expected = np.vstack([a_data, b_data])
        self.assertTrue(np.array_equal(res, expected))

    def testDStackExecution(self):
        a_data = np.random.rand(10)
        b_data = np.random.rand(10)

        a = tensor(a_data, chunk_size=4)
        b = tensor(b_data, chunk_size=4)

        c = dstack([a, b])
        res = self.executor.execute_tensor(c, concat=True)[0]
        expected = np.dstack([a_data, b_data])
        self.assertTrue(np.array_equal(res, expected))

        a_data = np.random.rand(10, 20)
        b_data = np.random.rand(10, 20)

        a = tensor(a_data, chunk_size=3)
        b = tensor(b_data, chunk_size=4)

        c = dstack([a, b])
        res = self.executor.execute_tensor(c, concat=True)[0]
        expected = np.dstack([a_data, b_data])
        self.assertTrue(np.array_equal(res, expected))

    def testColumnStackExecution(self):
        a_data = np.array((1, 2, 3))
        b_data = np.array((2, 3, 4))
        a = tensor(a_data, chunk_size=1)
        b = tensor(b_data, chunk_size=2)

        c = column_stack((a, b))
        res = self.executor.execute_tensor(c, concat=True)[0]
        expected = np.column_stack((a_data, b_data))
        np.testing.assert_equal(res, expected)

        a_data = np.random.rand(4, 2, 3)
        b_data = np.random.rand(4, 2, 3)
        a = tensor(a_data, chunk_size=1)
        b = tensor(b_data, chunk_size=2)

        c = column_stack((a, b))
        res = self.executor.execute_tensor(c, concat=True)[0]
        expected = np.column_stack((a_data, b_data))
        np.testing.assert_equal(res, expected)

    def testUnion1dExecution(self):
        rs = np.random.RandomState(0)
        raw1 = rs.random(10)
        raw2 = rs.random(9)

        t1 = tensor(raw1, chunk_size=3)
        t2 = tensor(raw2, chunk_size=4)

        t = union1d(t1, t2, aggregate_size=1)
        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.union1d(raw1, raw2)
        np.testing.assert_array_equal(res, expected)

        t = union1d(t1, t2)
        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.union1d(raw1, raw2)
        np.testing.assert_array_equal(res, expected)
Ejemplo n.º 6
0
class Test(TestBase):
    def setUp(self):
        self.executor = ExecutorForTest('numpy')

    def testAverageExecution(self):
        data = arange(1, 5, chunk_size=1)
        t = average(data)

        res = self.executor.execute_tensor(t)[0]
        expected = np.average(np.arange(1, 5))
        self.assertEqual(res, expected)

        t = average(arange(1, 11, chunk_size=2),
                    weights=arange(10, 0, -1, chunk_size=2))

        res = self.executor.execute_tensor(t)[0]
        expected = np.average(range(1, 11), weights=range(10, 0, -1))
        self.assertEqual(res, expected)

        data = arange(6, chunk_size=2).reshape((3, 2))
        t = average(data,
                    axis=1,
                    weights=tensor([1. / 4, 3. / 4], chunk_size=2))

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.average(np.arange(6).reshape(3, 2),
                              axis=1,
                              weights=(1. / 4, 3. / 4))
        np.testing.assert_equal(res, expected)

        with self.assertRaises(TypeError):
            average(data, weights=tensor([1. / 4, 3. / 4], chunk_size=2))

    def testCovExecution(self):
        data = np.array([[0, 2], [1, 1], [2, 0]]).T
        x = tensor(data, chunk_size=1)

        t = cov(x)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.cov(data)
        np.testing.assert_equal(res, expected)

        data_x = [-2.1, -1, 4.3]
        data_y = [3, 1.1, 0.12]
        x = tensor(data_x, chunk_size=1)
        y = tensor(data_y, chunk_size=1)

        X = stack((x, y), axis=0)
        t = cov(x, y)
        r = tall(t == cov(X))
        self.assertTrue(self.executor.execute_tensor(r)[0])

    def testCorrcoefExecution(self):
        data_x = [-2.1, -1, 4.3]
        data_y = [3, 1.1, 0.12]
        x = tensor(data_x, chunk_size=1)
        y = tensor(data_y, chunk_size=1)

        t = corrcoef(x, y)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.corrcoef(data_x, data_y)
        np.testing.assert_equal(res, expected)

    def testPtpExecution(self):
        x = arange(4, chunk_size=1).reshape(2, 2)

        t = ptp(x, axis=0)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.ptp(np.arange(4).reshape(2, 2), axis=0)
        np.testing.assert_equal(res, expected)

        t = ptp(x, axis=1)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.ptp(np.arange(4).reshape(2, 2), axis=1)
        np.testing.assert_equal(res, expected)

        t = ptp(x)

        res = self.executor.execute_tensor(t)[0]
        expected = np.ptp(np.arange(4).reshape(2, 2))
        np.testing.assert_equal(res, expected)

    def testDigitizeExecution(self):
        data = np.array([0.2, 6.4, 3.0, 1.6])
        x = tensor(data, chunk_size=2)
        bins = np.array([0.0, 1.0, 2.5, 4.0, 10.0])
        inds = digitize(x, bins)

        res = self.executor.execute_tensor(inds, concat=True)[0]
        expected = np.digitize(data, bins)
        np.testing.assert_equal(res, expected)

        b = tensor(bins, chunk_size=2)
        inds = digitize(x, b)

        res = self.executor.execute_tensor(inds, concat=True)[0]
        expected = np.digitize(data, bins)
        np.testing.assert_equal(res, expected)

        data = np.array([1.2, 10.0, 12.4, 15.5, 20.])
        x = tensor(data, chunk_size=2)
        bins = np.array([0, 5, 10, 15, 20])
        inds = digitize(x, bins, right=True)

        res = self.executor.execute_tensor(inds, concat=True)[0]
        expected = np.digitize(data, bins, right=True)
        np.testing.assert_equal(res, expected)

        inds = digitize(x, bins, right=False)

        res = self.executor.execute_tensor(inds, concat=True)[0]
        expected = np.digitize(data, bins, right=False)
        np.testing.assert_equal(res, expected)

        data = sps.random(10, 1, density=.1) * 12
        x = tensor(data, chunk_size=2)
        bins = np.array([1.0, 2.0, 2.5, 4.0, 10.0])
        inds = digitize(x, bins)

        res = self.executor.execute_tensor(inds, concat=True)[0]
        expected = np.digitize(data.toarray(), bins, right=False)
        np.testing.assert_equal(res.toarray(), expected)

    @ignore_warning
    def testHistogramBinEdgesExecution(self):
        rs = np.random.RandomState(0)

        raw = rs.randint(10, size=(20, ))
        a = tensor(raw, chunk_size=3)

        # range provided
        for range_ in [(0, 10), (3, 11), (3, 7)]:
            bin_edges = histogram_bin_edges(a, range=range_)
            result = self.executor.execute_tensor(bin_edges)[0]
            expected = np.histogram_bin_edges(raw, range=range_)
            np.testing.assert_array_equal(result, expected)

        ctx, executor = self._create_test_context(self.executor)
        with ctx:
            raw2 = rs.randint(10, size=(1, ))
            b = tensor(raw2)
            raw3 = rs.randint(10, size=(0, ))
            c = tensor(raw3)
            for t, r in [(a, raw), (b, raw2), (c, raw3), (sort(a), raw)]:
                test_bins = [
                    10, 'stone', 'auto', 'doane', 'fd', 'rice', 'scott',
                    'sqrt', 'sturges'
                ]
                for bins in test_bins:
                    bin_edges = histogram_bin_edges(t, bins=bins)

                    if r.size > 0:
                        with self.assertRaises(TilesError):
                            executor.execute_tensor(bin_edges)

                    result = executor.execute_tensors([bin_edges])[0]
                    expected = np.histogram_bin_edges(r, bins=bins)
                    np.testing.assert_array_equal(result, expected)

                test_bins = [[0, 4, 8], tensor([0, 4, 8], chunk_size=2)]
                for bins in test_bins:
                    bin_edges = histogram_bin_edges(t, bins=bins)
                    result = executor.execute_tensors([bin_edges])[0]
                    expected = np.histogram_bin_edges(r, bins=[0, 4, 8])
                    np.testing.assert_array_equal(result, expected)

            raw = np.arange(5)
            a = tensor(raw, chunk_size=3)
            bin_edges = histogram_bin_edges(a)
            result = executor.execute_tensors([bin_edges])[0]
            expected = np.histogram_bin_edges(raw)
            self.assertEqual(bin_edges.shape, expected.shape)
            np.testing.assert_array_equal(result, expected)

    @ignore_warning
    def testHistogramExecution(self):
        rs = np.random.RandomState(0)

        raw = rs.randint(10, size=(20, ))
        a = tensor(raw, chunk_size=3)
        raw_weights = rs.random(20)
        weights = tensor(raw_weights, chunk_size=4)

        # range provided
        for range_ in [(0, 10), (3, 11), (3, 7)]:
            bin_edges = histogram(a, range=range_)[0]
            result = self.executor.execute_tensor(bin_edges)[0]
            expected = np.histogram(raw, range=range_)[0]
            np.testing.assert_array_equal(result, expected)

        for wt in (raw_weights, weights):
            for density in (True, False):
                bins = [1, 4, 6, 9]
                bin_edges = histogram(a,
                                      bins=bins,
                                      weights=wt,
                                      density=density)[0]
                result = self.executor.execute_tensor(bin_edges)[0]
                expected = np.histogram(raw,
                                        bins=bins,
                                        weights=raw_weights,
                                        density=density)[0]
                np.testing.assert_almost_equal(result, expected)

        ctx, executor = self._create_test_context(self.executor)
        with ctx:
            raw2 = rs.randint(10, size=(1, ))
            b = tensor(raw2)
            raw3 = rs.randint(10, size=(0, ))
            c = tensor(raw3)
            for t, r in [(a, raw), (b, raw2), (c, raw3), (sort(a), raw)]:
                for density in (True, False):
                    test_bins = [
                        10, 'stone', 'auto', 'doane', 'fd', 'rice', 'scott',
                        'sqrt', 'sturges'
                    ]
                    for bins in test_bins:
                        hist = histogram(t, bins=bins, density=density)[0]

                        if r.size > 0:
                            with self.assertRaises(TilesError):
                                executor.execute_tensor(hist)

                        result = executor.execute_tensors([hist])[0]
                        expected = np.histogram(r, bins=bins,
                                                density=density)[0]
                        np.testing.assert_array_equal(result, expected)

                    test_bins = [[0, 4, 8], tensor([0, 4, 8], chunk_size=2)]
                    for bins in test_bins:
                        hist = histogram(t, bins=bins, density=density)[0]
                        result = executor.execute_tensors([hist])[0]
                        expected = np.histogram(r,
                                                bins=[0, 4, 8],
                                                density=density)[0]
                        np.testing.assert_array_equal(result, expected)

            # test unknown shape
            raw4 = rs.rand(10)
            d = tensor(raw4, chunk_size=3)
            d = d[d < 0.9]
            hist = histogram(d)
            result = executor.execute_tensors(hist)[0]
            expected = np.histogram(raw4[raw4 < 0.9])[0]
            np.testing.assert_array_equal(result, expected)

            raw5 = np.arange(3, 10)
            e = arange(10, chunk_size=3)
            e = e[e >= 3]
            hist = histogram(e)
            result = executor.execute_tensors(hist)[0]
            expected = np.histogram(raw5)[0]
            np.testing.assert_array_equal(result, expected)

    def testQuantileExecution(self):
        # test 1 chunk, 1-d
        raw = np.random.rand(20)
        a = tensor(raw, chunk_size=20)

        raw2 = raw.copy()
        raw2[np.random.RandomState(0).randint(raw.size, size=3)] = np.nan
        a2 = tensor(raw2, chunk_size=20)

        for q in [
                np.random.RandomState(0).rand(),
                np.random.RandomState(0).rand(5)
        ]:
            for interpolation in INTERPOLATION_TYPES:
                for keepdims in [True, False]:
                    r = quantile(a,
                                 q,
                                 interpolation=interpolation,
                                 keepdims=keepdims)

                    result = self.executor.execute_tensor(r, concat=True)[0]
                    expected = np.quantile(raw,
                                           q,
                                           interpolation=interpolation,
                                           keepdims=keepdims)

                    np.testing.assert_array_equal(result, expected)

                    r2 = quantile(a2,
                                  q,
                                  interpolation=interpolation,
                                  keepdims=keepdims)

                    result = self.executor.execute_tensor(r2, concat=True)[0]
                    expected = np.quantile(raw2,
                                           q,
                                           interpolation=interpolation,
                                           keepdims=keepdims)

                    np.testing.assert_array_equal(result, expected)

        # test 1 chunk, 2-d
        raw = np.random.rand(20, 10)
        a = tensor(raw, chunk_size=20)

        raw2 = raw.copy()
        raw2.flat[np.random.RandomState(0).randint(raw.size, size=3)] = np.nan
        a2 = tensor(raw2, chunk_size=20)

        for q in [
                np.random.RandomState(0).rand(),
                np.random.RandomState(0).rand(5)
        ]:
            for interpolation in INTERPOLATION_TYPES:
                for keepdims in [True, False]:
                    for axis in [None, 0, 1]:
                        r = quantile(a,
                                     q,
                                     axis=axis,
                                     interpolation=interpolation,
                                     keepdims=keepdims)

                        result = self.executor.execute_tensor(r,
                                                              concat=True)[0]
                        expected = np.quantile(raw,
                                               q,
                                               axis=axis,
                                               interpolation=interpolation,
                                               keepdims=keepdims)

                        np.testing.assert_array_equal(result, expected)

                        r2 = quantile(a2,
                                      q,
                                      axis=axis,
                                      interpolation=interpolation,
                                      keepdims=keepdims)

                        result = self.executor.execute_tensor(r2,
                                                              concat=True)[0]
                        expected = np.quantile(raw2,
                                               q,
                                               axis=axis,
                                               interpolation=interpolation,
                                               keepdims=keepdims)

                        np.testing.assert_array_equal(result, expected)

        # test multi chunks, 1-d
        raw = np.random.rand(20)
        a = tensor(raw, chunk_size=3)

        raw2 = raw.copy()
        raw2[np.random.RandomState(0).randint(raw.size, size=3)] = np.nan
        a2 = tensor(raw2, chunk_size=20)

        for q in [
                np.random.RandomState(0).rand(),
                np.random.RandomState(0).rand(5)
        ]:
            for interpolation in INTERPOLATION_TYPES:
                for keepdims in [True, False]:
                    r = quantile(a,
                                 q,
                                 interpolation=interpolation,
                                 keepdims=keepdims)

                    result = self.executor.execute_tensor(r, concat=True)[0]
                    expected = np.quantile(raw,
                                           q,
                                           interpolation=interpolation,
                                           keepdims=keepdims)

                    np.testing.assert_almost_equal(result, expected)

                    r2 = quantile(a2,
                                  q,
                                  interpolation=interpolation,
                                  keepdims=keepdims)

                    result = self.executor.execute_tensor(r2, concat=True)[0]
                    expected = np.quantile(raw2,
                                           q,
                                           interpolation=interpolation,
                                           keepdims=keepdims)

                    np.testing.assert_almost_equal(result, expected)

        # test multi chunk, 2-d
        raw = np.random.rand(20, 10)
        a = tensor(raw, chunk_size=(12, 6))

        raw2 = raw.copy()
        raw2.flat[np.random.RandomState(0).randint(raw.size, size=3)] = np.nan
        a2 = tensor(raw2, chunk_size=(12, 6))

        for q in [
                np.random.RandomState(0).rand(),
                np.random.RandomState(0).rand(5)
        ]:
            for interpolation in INTERPOLATION_TYPES:
                for keepdims in [True, False]:
                    for axis in [None, 0, 1]:
                        r = quantile(a,
                                     q,
                                     axis=axis,
                                     interpolation=interpolation,
                                     keepdims=keepdims)

                        result = self.executor.execute_tensor(r,
                                                              concat=True)[0]
                        expected = np.quantile(raw,
                                               q,
                                               axis=axis,
                                               interpolation=interpolation,
                                               keepdims=keepdims)

                        np.testing.assert_almost_equal(result, expected)

                        r2 = quantile(a2,
                                      q,
                                      axis=axis,
                                      interpolation=interpolation,
                                      keepdims=keepdims)

                        result = self.executor.execute_tensor(r2,
                                                              concat=True)[0]
                        expected = np.quantile(raw2,
                                               q,
                                               axis=axis,
                                               interpolation=interpolation,
                                               keepdims=keepdims)

                        np.testing.assert_almost_equal(result, expected)

        # test out, 1 chunk
        raw = np.random.rand(20)
        q = np.random.rand(11)
        a = tensor(raw, chunk_size=20)
        out = empty((5, 11))
        quantile(a, q, out=out)

        result = self.executor.execute_tensor(out, concat=True)[0]
        expected = np.quantile(raw, q, out=np.empty((5, 11)))
        np.testing.assert_array_equal(result, expected)

        # test out, multi chunks
        raw = np.random.rand(20)
        q = np.random.rand(11)
        a = tensor(raw, chunk_size=3)
        out = empty((5, 11))
        quantile(a, q, out=out)

        result = self.executor.execute_tensor(out, concat=True)[0]
        expected = np.quantile(raw, q, out=np.empty((5, 11)))
        np.testing.assert_almost_equal(result, expected)

        # test q which is a tensor
        q_raw = np.random.RandomState(0).rand(5)
        q = tensor(q_raw, chunk_size=3)

        ctx, executor = self._create_test_context(self.executor)
        with ctx:
            r = quantile(a, q, axis=None)

            result = executor.execute_tensors([r])[0]
            expected = np.quantile(raw, q_raw, axis=None)

            np.testing.assert_almost_equal(result, expected)

            with self.assertRaises(ValueError):
                q[0] = 1.1
                r = quantile(a, q, axis=None)
                _ = executor.execute_tensors(r)[0]

    def testPercentileExecution(self):
        raw = np.random.rand(20, 10)
        q = np.random.RandomState(0).randint(100, size=11)
        a = tensor(raw, chunk_size=7)
        r = percentile(a, q)

        result = self.executor.execute_tensor(r, concat=True)[0]
        expected = np.percentile(raw, q)
        np.testing.assert_almost_equal(result, expected)

        mq = tensor(q)

        ctx, executor = self._create_test_context(self.executor)
        with ctx:
            r = percentile(a, mq)
            result = executor.execute_tensors([r])[0]

            np.testing.assert_almost_equal(result, expected)

    def testMedianExecution(self):
        raw = np.random.rand(20, 10)
        a = tensor(raw, chunk_size=7)
        r = median(a)

        result = self.executor.execute_tensor(r, concat=True)[0]
        expected = np.median(raw)

        np.testing.assert_array_equal(result, expected)

        r = median(a, axis=1)

        result = self.executor.execute_tensor(r, concat=True)[0]
        expected = np.median(raw, axis=1)

        np.testing.assert_array_equal(result, expected)
Ejemplo n.º 7
0
class Test(unittest.TestCase):
    def setUp(self) -> None:
        self._executor = ExecutorForTest('numpy')

    @unittest.skipIf(distance.pdist is None, 'scipy not installed')
    def testPdistExecution(self):
        from scipy.spatial.distance import pdist as sp_pdist

        raw = np.random.rand(100, 10)

        # test 1 chunk
        x = tensor(raw, chunk_size=100)

        dist = distance.pdist(x)
        result = self._executor.execute_tensor(dist, concat=True)[0]
        expected = sp_pdist(raw)
        np.testing.assert_array_equal(result, expected)

        dist = distance.pdist(x, metric='hamming')
        result = self._executor.execute_tensor(dist, concat=True)[0]
        expected = sp_pdist(raw, metric='hamming')
        np.testing.assert_array_equal(result, expected)

        f = lambda u, v: np.sqrt(((u - v)**2).sum())
        dist = distance.pdist(x, metric=f)
        result = self._executor.execute_tensor(dist, concat=True)[0]
        expected = sp_pdist(raw, metric=f)
        np.testing.assert_array_equal(result, expected)

        # test more than 1 chunk
        x = tensor(raw, chunk_size=12)

        dist = distance.pdist(x)
        tdist = dist.tiles()
        self.assertEqual(len(tdist.chunks), 1)
        result = self._executor.execute_tensor(dist, concat=True)[0]
        expected = sp_pdist(raw)
        np.testing.assert_array_equal(result, expected)

        dist = distance.pdist(x, aggregate_size=3)
        tdist = dist.tiles()
        self.assertEqual(len(tdist.chunks), 3)
        result = self._executor.execute_tensor(dist, concat=True)[0]
        expected = sp_pdist(raw)
        np.testing.assert_array_equal(result, expected)

        dist = distance.pdist(x, metric='hamming', aggregate_size=2)
        tdist = dist.tiles()
        self.assertEqual(len(tdist.chunks), 2)
        result = self._executor.execute_tensor(dist, concat=True)[0]
        expected = sp_pdist(raw, metric='hamming')
        np.testing.assert_array_equal(result, expected)

        f = lambda u, v: np.sqrt(((u - v)**2).sum())
        dist = distance.pdist(x, metric=f, aggregate_size=2)
        result = self._executor.execute_tensor(dist, concat=True)[0]
        expected = sp_pdist(raw, metric=f)
        np.testing.assert_array_equal(result, expected)

        for x in [tensor(raw), tensor(raw, chunk_size=12)]:
            # test w
            weight = np.random.rand(10)
            w = tensor(weight, chunk_size=7)
            dist = distance.pdist(x, metric='wminkowski', p=3, w=w)
            result = self._executor.execute_tensor(dist, concat=True)[0]
            expected = sp_pdist(raw, metric='wminkowski', p=3, w=weight)
            np.testing.assert_array_equal(result, expected)

            # test V
            v = np.random.rand(10)
            V = tensor(v, chunk_size=7)
            dist = distance.pdist(x, metric='seuclidean', V=V)
            result = self._executor.execute_tensor(dist, concat=True)[0]
            expected = sp_pdist(raw, metric='seuclidean', V=v)
            np.testing.assert_array_equal(result, expected)

            # test VI
            vi = np.random.rand(10, 10)
            VI = tensor(vi, chunk_size=8)
            dist = distance.pdist(x, metric='mahalanobis', VI=VI)
            result = self._executor.execute_tensor(dist, concat=True)[0]
            expected = sp_pdist(raw, metric='mahalanobis', VI=vi)
            np.testing.assert_array_equal(result, expected)

    @unittest.skipIf(distance.cdist is None, 'scipy not installed')
    def testCdistExecution(self):
        from scipy.spatial.distance import cdist as sp_cdist

        raw_a = np.random.rand(100, 10)
        raw_b = np.random.rand(89, 10)

        # test 1 chunk
        xa = tensor(raw_a, chunk_size=100)
        xb = tensor(raw_b, chunk_size=100)

        dist = distance.cdist(xa, xb)
        result = self._executor.execute_tensor(dist, concat=True)[0]
        expected = sp_cdist(raw_a, raw_b)
        np.testing.assert_array_equal(result, expected)

        dist = distance.cdist(xa, xb, metric='hamming')
        result = self._executor.execute_tensor(dist, concat=True)[0]
        expected = sp_cdist(raw_a, raw_b, metric='hamming')
        np.testing.assert_array_equal(result, expected)

        f = lambda u, v: np.sqrt(((u - v)**2).sum())
        dist = distance.cdist(xa, xb, metric=f)
        result = self._executor.execute_tensor(dist, concat=True)[0]
        expected = sp_cdist(raw_a, raw_b, metric=f)
        np.testing.assert_array_equal(result, expected)

        # test more than 1 chunk
        xa = tensor(raw_a, chunk_size=12)
        xb = tensor(raw_b, chunk_size=13)

        dist = distance.cdist(xa, xb)
        result = self._executor.execute_tensor(dist, concat=True)[0]
        expected = sp_cdist(raw_a, raw_b)
        np.testing.assert_array_equal(result, expected)

        dist = distance.cdist(xa, xb, metric='hamming')
        result = self._executor.execute_tensor(dist, concat=True)[0]
        expected = sp_cdist(raw_a, raw_b, metric='hamming')
        np.testing.assert_array_equal(result, expected)

        f = lambda u, v: np.sqrt(((u - v)**2).sum())
        dist = distance.cdist(xa, xb, metric=f)
        result = self._executor.execute_tensor(dist, concat=True)[0]
        expected = sp_cdist(raw_a, raw_b, metric=f)
        np.testing.assert_array_equal(result, expected)

        for xa, xb in [(tensor(raw_a), tensor(raw_b)),
                       (tensor(raw_a,
                               chunk_size=12), tensor(raw_b, chunk_size=13))]:
            # test w
            weight = np.random.rand(10)
            w = tensor(weight, chunk_size=7)
            dist = distance.cdist(xa, xb, metric='wminkowski', p=3, w=w)
            result = self._executor.execute_tensor(dist, concat=True)[0]
            expected = sp_cdist(raw_a,
                                raw_b,
                                metric='wminkowski',
                                p=3,
                                w=weight)
            np.testing.assert_array_equal(result, expected)

            # test V
            v = np.random.rand(10)
            V = tensor(v, chunk_size=7)
            dist = distance.cdist(xa, xb, metric='seuclidean', V=V)
            result = self._executor.execute_tensor(dist, concat=True)[0]
            expected = sp_cdist(raw_a, raw_b, metric='seuclidean', V=v)
            np.testing.assert_array_equal(result, expected)

            # test VI
            vi = np.random.rand(10, 10)
            VI = tensor(vi, chunk_size=8)
            dist = distance.cdist(xa, xb, metric='mahalanobis', VI=VI)
            result = self._executor.execute_tensor(dist, concat=True)[0]
            expected = sp_cdist(raw_a, raw_b, metric='mahalanobis', VI=vi)
            np.testing.assert_array_equal(result, expected)

    @unittest.skipIf(distance.cdist is None, 'scipy not installed')
    def testSqureFormExecution(self):
        from scipy.spatial.distance import pdist as sp_pdist, \
            squareform as sp_squareform

        raw_a = np.random.rand(80, 10)
        raw_pdsit = sp_pdist(raw_a)
        raw_square = sp_squareform(raw_pdsit)

        # tomatrix, test 1 chunk
        vec = tensor(raw_pdsit, chunk_size=raw_pdsit.shape[0])
        mat = distance.squareform(vec, chunk_size=100)
        result = self._executor.execute_tensor(mat, concat=True)[0]
        np.testing.assert_array_equal(result, raw_square)

        # tomatrix, test more than 1 chunk
        vec = tensor(raw_pdsit, chunk_size=33)
        self.assertGreater(len(vec.tiles().chunks), 1)
        mat = distance.squareform(vec, chunk_size=34)
        result = self._executor.execute_tensor(mat, concat=True)[0]
        np.testing.assert_array_equal(result, raw_square)

        # tovec, test 1 chunk
        mat = tensor(raw_square)
        vec = distance.squareform(mat, chunk_size=raw_pdsit.shape[0])
        self.assertEqual(len(mat.tiles().chunks), 1)
        self.assertEqual(len(vec.tiles().chunks), 1)
        result = self._executor.execute_tensor(vec, concat=True)[0]
        np.testing.assert_array_equal(result, raw_pdsit)

        # tovec, test more than 1 chunk
        mat = tensor(raw_square, chunk_size=31)
        vec = distance.squareform(mat, chunk_size=40)
        self.assertGreater(len(vec.tiles().chunks), 1)
        result = self._executor.execute_tensor(vec, concat=True)[0]
        np.testing.assert_array_equal(result, raw_pdsit)

        # test checks
        # generate non-symmetric matrix
        non_sym_arr = np.random.RandomState(0).rand(10, 10)

        # 1 chunk
        mat = tensor(non_sym_arr)
        vec = distance.squareform(mat, checks=True, chunk_size=100)
        with self.assertRaises(ValueError):
            _ = self._executor.execute_tensor(vec, concat=True)[0]
        # force checks=False
        vec = distance.squareform(mat, checks=False, chunk_size=100)
        _ = self._executor.execute_tensor(vec, concat=True)[0]

        # more than 1 chunk
        mat = tensor(non_sym_arr, chunk_size=6)
        vec = distance.squareform(mat, checks=True, chunk_size=8)
        self.assertGreater(len(vec.tiles().chunks), 1)
        with self.assertRaises(ValueError):
            _ = self._executor.execute_tensor(vec, concat=True)[0]
        # force checks=False
        vec = distance.squareform(mat, checks=False, chunk_size=100)
        _ = self._executor.execute_tensor(vec, concat=True)[0]
Ejemplo n.º 8
0
class TestUnary(TestBase):
    def setUp(self):
        super().setUp()
        self.executor = ExecutorForTest()

    def testAbs(self):
        data1 = pd.DataFrame(np.random.uniform(low=-1, high=1, size=(10, 10)))
        df1 = from_pandas(data1, chunk_size=5)

        result = self.executor.execute_dataframe(df1.abs(), concat=True)[0]
        expected = data1.abs()
        pd.testing.assert_frame_equal(expected, result)

        result = self.executor.execute_dataframe(abs(df1), concat=True)[0]
        pd.testing.assert_frame_equal(expected, result)

    def testNot(self):
        data1 = pd.DataFrame(np.random.uniform(low=-1, high=1, size=(10, 10)) > 0)
        df1 = from_pandas(data1, chunk_size=5)

        result = self.executor.execute_dataframe(~df1, concat=True)[0]
        expected = ~data1
        pd.testing.assert_frame_equal(expected, result)

    def testNegative(self):
        data1 = pd.DataFrame(np.random.randint(low=0, high=100, size=(10, 10)))
        df1 = from_pandas(data1, chunk_size=5)

        result = self.executor.execute_dataframe(-df1, concat=True)[0]
        expected = -data1
        pd.testing.assert_frame_equal(expected, result)

    def testUfunc(self):
        df_raw = pd.DataFrame(np.random.uniform(size=(10, 10)),
                              index=pd.RangeIndex(9, -1, -1))
        df = from_pandas(df_raw, chunk_size=5)

        series_raw = pd.Series(np.random.uniform(size=10),
                               index=pd.RangeIndex(9, -1, -1))
        series = from_pandas_series(series_raw, chunk_size=5)

        ufuncs = [
            [np.abs, mt.abs],
            [np.log, mt.log],
            [np.log2, mt.log2],
            [np.log10, mt.log10],
            [np.sin, mt.sin],
            [np.cos, mt.cos],
            [np.tan, mt.tan],
            [np.sinh, mt.sinh],
            [np.cosh, mt.cosh],
            [np.tanh, mt.tanh],
            [np.arcsin, mt.arcsin],
            [np.arccos, mt.arccos],
            [np.arctan, mt.arctan],
            [np.arcsinh, mt.arcsinh],
            [np.arccosh, mt.arccosh],
            [np.arctanh, mt.arctanh],
            [np.radians, mt.radians],
            [np.degrees, mt.degrees],
            [np.ceil, mt.ceil],
            [np.floor, mt.floor],
            [partial(np.around, decimals=2), partial(mt.around, decimals=2)],
            [np.exp, mt.exp],
            [np.exp2, mt.exp2],
            [np.expm1, mt.expm1],
            [np.sqrt, mt.sqrt],
            [np.isnan, mt.isnan],
            [np.isfinite, mt.isfinite],
            [np.isinf, mt.isinf],
            [np.negative, mt.negative],
        ]

        for raw, data in [(df_raw, df), (series_raw, series)]:
            for npf, mtf in ufuncs:
                r = mtf(data)

                result = self.executor.execute_tensor(r, concat=True)[0]
                expected = npf(raw)

                if isinstance(raw, pd.DataFrame):
                    pd.testing.assert_frame_equal(result, expected)
                else:
                    pd.testing.assert_series_equal(result, expected)

                # test numpy ufunc
                r = npf(data)

                result = self.executor.execute_tensor(r, concat=True)[0]

                if isinstance(raw, pd.DataFrame):
                    pd.testing.assert_frame_equal(result, expected)
                else:
                    pd.testing.assert_series_equal(result, expected)

    def testDateTimeBin(self):
        rs = np.random.RandomState(0)
        df_raw = pd.DataFrame({'a': rs.randint(1000, size=10),
                               'b': rs.rand(10),
                               'c': [pd.Timestamp(rs.randint(1604000000, 1604481373))
                                     for _ in range(10)]},
                              index=pd.RangeIndex(9, -1, -1))
        df = from_pandas(df_raw, chunk_size=5)
        r = (df['c'] > to_datetime('2000-01-01')) & (df['c'] < to_datetime('2021-01-01'))

        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = (df_raw['c'] > pd.to_datetime('2000-01-01')) & \
                   (df_raw['c'] < pd.to_datetime('2021-01-01'))
        pd.testing.assert_series_equal(result, expected)

    def testSeriesAndTensor(self):
        rs = np.random.RandomState(0)
        s_raw = pd.Series(rs.rand(10)) < 0.5
        a_raw = rs.rand(10) < 0.5

        series = from_pandas_series(s_raw, chunk_size=5)
        t = mt.tensor(a_raw, chunk_size=5)

        r = t | series
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = a_raw | s_raw
        pd.testing.assert_series_equal(result, expected)
Ejemplo n.º 9
0
class Test(unittest.TestCase):
    def setUp(self) -> None:
        self.executor = ExecutorForTest('numpy')

    def testManhattanDistances(self):
        x = mt.random.randint(10, size=(10, 3), density=0.4)
        y = mt.random.randint(10, size=(11, 3), density=0.5)

        with self.assertRaises(TypeError):
            manhattan_distances(x, y, sum_over_features=False)

        x = x.todense()
        y = y.todense()

        d = manhattan_distances(x, y, sum_over_features=True)
        self.assertEqual(d.shape, (10, 11))
        d = manhattan_distances(x, y, sum_over_features=False)
        self.assertEqual(d.shape, (110, 3))

    def testManhattanDistancesExecution(self):
        raw_x = np.random.rand(20, 5)
        raw_y = np.random.rand(21, 5)

        x1 = mt.tensor(raw_x, chunk_size=30)
        y1 = mt.tensor(raw_y, chunk_size=30)

        x2 = mt.tensor(raw_x, chunk_size=11)
        y2 = mt.tensor(raw_y, chunk_size=12)

        raw_sparse_x = sps.random(20,
                                  5,
                                  density=0.4,
                                  format='csr',
                                  random_state=0)
        raw_sparse_y = sps.random(21,
                                  5,
                                  density=0.3,
                                  format='csr',
                                  random_state=0)

        x3 = mt.tensor(raw_sparse_x, chunk_size=30)
        y3 = mt.tensor(raw_sparse_y, chunk_size=30)

        x4 = mt.tensor(raw_sparse_x, chunk_size=11)
        y4 = mt.tensor(raw_sparse_y, chunk_size=12)

        for x, y, is_sparse in [(x1, y1, False), (x2, y2, False),
                                (x3, y3, True), (x4, y4, True)]:
            if is_sparse:
                rx, ry = raw_sparse_x, raw_sparse_y
            else:
                rx, ry = raw_x, raw_y

            sv = [True, False] if not is_sparse else [True]

            for sum_over_features in sv:
                d = manhattan_distances(x, y, sum_over_features)

                result = self.executor.execute_tensor(d, concat=True)[0]
                expected = sk_manhattan_distances(rx, ry, sum_over_features)

                np.testing.assert_almost_equal(result, expected)

                d = manhattan_distances(x, sum_over_features=sum_over_features)

                result = self.executor.execute_tensor(d, concat=True)[0]
                expected = sk_manhattan_distances(
                    rx, sum_over_features=sum_over_features)

                np.testing.assert_almost_equal(result, expected)
Ejemplo n.º 10
0
class Test(unittest.TestCase):
    def setUp(self) -> None:
        self.executor = ExecutorForTest('numpy')

    def testNormalizeOp(self):
        with self.assertRaises(ValueError):
            normalize(mt.random.random(10, 3), norm='unknown')

        with self.assertRaises(ValueError):
            normalize(mt.random.random(10, 3), axis=-1)

        with self.assertRaises(ValueError):
            normalize(mt.random.rand(10, 3, 3))

    def testNormalizeExecution(self):
        raw_dense = np.random.rand(10, 10)
        raw_sparse = sps.random(10, 10, density=0.4, format='csr')

        for chunk_size in [10, 6, (10, 6), (6, 10)]:
            for raw, x in [
                (raw_dense, mt.tensor(raw_dense, chunk_size=chunk_size)),
                (raw_sparse, mt.tensor(raw_sparse, chunk_size=chunk_size))
            ]:
                for norm in ['l1', 'l2', 'max']:
                    for axis in (0, 1):
                        for use_sklearn in [True, False]:
                            n = normalize(x,
                                          norm=norm,
                                          axis=axis,
                                          return_norm=False)
                            n.op._use_sklearn = use_sklearn

                            result = self.executor.execute_tensor(
                                n, concat=True)[0]
                            expected = sk_normalize(raw,
                                                    norm=norm,
                                                    axis=axis,
                                                    return_norm=False)

                            if sps.issparse(expected):
                                expected = expected.A
                            np.testing.assert_almost_equal(
                                np.asarray(result), expected)

        raw_dense = np.random.rand(10, 10)
        raw_sparse = sps.random(10, 10, density=0.4, format='csr')

        # test copy and return_normalize
        for axis in (0, 1):
            for chunk_size in (10, 6, (6, 10)):
                for raw in (raw_dense, raw_sparse):
                    x = mt.tensor(raw, chunk_size=chunk_size)
                    n = normalize(x, axis=axis, copy=False, return_norm=True)

                    results = self.executor.execute_tensors(n)
                    raw_copy = raw.copy()
                    try:
                        expects = sk_normalize(raw_copy,
                                               axis=axis,
                                               copy=False,
                                               return_norm=True)
                    except NotImplementedError:
                        continue

                    if sps.issparse(expects[0]):
                        expected = expects[0].A
                    else:
                        expected = expects[0]
                    np.testing.assert_almost_equal(np.asarray(results[0]),
                                                   expected)
                    np.testing.assert_almost_equal(results[1], expects[1])
Ejemplo n.º 11
0
class Test(TestBase):
    def setUp(self):
        super().setUp()
        self.executor = ExecutorForTest('numpy')

    @unittest.skipIf(tiledb is None, 'tiledb not installed')
    def testStoreTileDBExecution(self):
        ctx = tiledb.Ctx()

        tempdir = tempfile.mkdtemp()
        try:
            # store TileDB dense array
            expected = np.random.rand(8, 4, 3)
            a = tensor(expected, chunk_size=(3, 3, 2))
            save = totiledb(tempdir, a, ctx=ctx)
            self.executor.execute_tensor(save)

            with tiledb.DenseArray(uri=tempdir, ctx=ctx) as arr:
                np.testing.assert_allclose(expected, arr.read_direct())
        finally:
            shutil.rmtree(tempdir)

        tempdir = tempfile.mkdtemp()
        try:
            # store tensor with 1 chunk to TileDB dense array
            a = arange(12)
            save = totiledb(tempdir, a, ctx=ctx)
            self.executor.execute_tensor(save)

            with tiledb.DenseArray(uri=tempdir, ctx=ctx) as arr:
                np.testing.assert_allclose(np.arange(12), arr.read_direct())
        finally:
            shutil.rmtree(tempdir)

        tempdir = tempfile.mkdtemp()
        try:
            # store 2-d TileDB sparse array
            expected = sps.random(8, 7, density=0.1)
            a = tensor(expected, chunk_size=(3, 5))
            save = totiledb(tempdir, a, ctx=ctx)
            self.executor.execute_tensor(save)

            with tiledb.SparseArray(uri=tempdir, ctx=ctx) as arr:
                data = arr[:, :]
                coords = data['coords']
                value = data[arr.attr(0).name]
                ij = tuple(coords[arr.domain.dim(k).name]
                           for k in range(arr.ndim))
                result = sps.coo_matrix((value, ij), shape=arr.shape)

                np.testing.assert_allclose(expected.toarray(),
                                           result.toarray())
        finally:
            shutil.rmtree(tempdir)

        tempdir = tempfile.mkdtemp()
        try:
            # store TileDB dense array
            expected = np.asfortranarray(np.random.rand(8, 4, 3))
            a = tensor(expected, chunk_size=(3, 3, 2))
            save = totiledb(tempdir, a, ctx=ctx)
            self.executor.execute_tensor(save)

            with tiledb.DenseArray(uri=tempdir, ctx=ctx) as arr:
                np.testing.assert_allclose(expected, arr.read_direct())
                self.assertEqual(arr.schema.cell_order, 'col-major')
        finally:
            shutil.rmtree(tempdir)

    @unittest.skipIf(h5py is None, 'h5py not installed')
    def testStoreHDF5Execution(self):
        raw = np.random.RandomState(0).rand(10, 20)

        group_name = 'test_group'
        dataset_name = 'test_dataset'

        t1 = tensor(raw, chunk_size=20)
        t2 = tensor(raw, chunk_size=9)

        with self.assertRaises(TypeError):
            tohdf5(object(), t2)

        this = self

        class MockSession:
            def __init__(self):
                self.executor = this.executor

        ctx = LocalContext(MockSession())
        executor = ExecutorForTest('numpy', storage=ctx)
        with ctx:
            with tempfile.TemporaryDirectory() as d:
                filename = os.path.join(
                    d, 'test_store_{}.hdf5'.format(int(time.time())))

                # test 1 chunk
                r = tohdf5(filename,
                           t1,
                           group=group_name,
                           dataset=dataset_name)

                executor.execute_tensor(r)

                with h5py.File(filename, 'r') as f:
                    result = np.asarray(f['{}/{}'.format(
                        group_name, dataset_name)])
                    np.testing.assert_array_equal(result, raw)

                # test filename
                r = tohdf5(filename,
                           t2,
                           group=group_name,
                           dataset=dataset_name)

                executor.execute_tensor(r)

                rt = get_tiled(r)
                self.assertEqual(
                    type(rt.chunks[0].inputs[1].op).__name__,
                    'SuccessorsExclusive')
                self.assertEqual(len(rt.chunks[0].inputs[1].inputs), 0)

                with h5py.File(filename, 'r') as f:
                    result = np.asarray(f['{}/{}'.format(
                        group_name, dataset_name)])
                    np.testing.assert_array_equal(result, raw)

                with self.assertRaises(ValueError):
                    tohdf5(filename, t2)

                with h5py.File(filename, 'r') as f:
                    # test file
                    r = tohdf5(f, t2, group=group_name, dataset=dataset_name)

                executor.execute_tensor(r)

                with h5py.File(filename, 'r') as f:
                    result = np.asarray(f['{}/{}'.format(
                        group_name, dataset_name)])
                    np.testing.assert_array_equal(result, raw)

                with self.assertRaises(ValueError):
                    with h5py.File(filename, 'r') as f:
                        tohdf5(f, t2)

                with h5py.File(filename, 'r') as f:
                    # test dataset
                    ds = f['{}/{}'.format(group_name, dataset_name)]
                    # test file
                    r = tohdf5(ds, t2)

                executor.execute_tensor(r)

                with h5py.File(filename, 'r') as f:
                    result = np.asarray(f['{}/{}'.format(
                        group_name, dataset_name)])
                    np.testing.assert_array_equal(result, raw)

    @unittest.skipIf(zarr is None, 'zarr not installed')
    def testStoreZarrExecution(self):
        raw = np.random.RandomState(0).rand(10, 20)

        group_name = 'test_group'
        dataset_name = 'test_dataset'

        t = tensor(raw, chunk_size=6)

        with self.assertRaises(TypeError):
            tozarr(object(), t)

        with tempfile.TemporaryDirectory() as d:
            filename = os.path.join(
                d, 'test_store_{}.zarr'.format(int(time.time())))
            path = '{}/{}/{}'.format(filename, group_name, dataset_name)

            r = tozarr(filename,
                       t,
                       group=group_name,
                       dataset=dataset_name,
                       compressor=Zstd(level=3))
            self.executor.execute_tensor(r)

            arr = zarr.open(path)
            np.testing.assert_array_equal(arr, raw)
            self.assertEqual(arr.compressor, Zstd(level=3))

            r = tozarr(path, t + 2)
            self.executor.execute_tensor(r)

            arr = zarr.open(path)
            np.testing.assert_array_equal(arr, raw + 2)

            filters = [Delta(dtype='i4')]
            compressor = Blosc(cname='zstd', clevel=1, shuffle=Blosc.SHUFFLE)
            arr = zarr.open(path, compressor=compressor, filters=filters)

            r = tozarr(arr, t + 1)
            self.executor.execute_tensor(r)
            result = zarr.open_array(path)
            np.testing.assert_array_equal(result, raw + 1)
Ejemplo n.º 12
0
class Test(TestBase):
    def setUp(self):
        super().setUp()
        self.executor = ExecutorForTest()

    def testSetIndex(self):
        df1 = pd.DataFrame([[1, 3, 3], [4, 2, 6], [7, 8, 9]],
                           index=['a1', 'a2', 'a3'],
                           columns=['x', 'y', 'z'])
        df2 = md.DataFrame(df1, chunk_size=2)

        expected = df1.set_index('y', drop=True)
        df3 = df2.set_index('y', drop=True)
        pd.testing.assert_frame_equal(
            expected,
            self.executor.execute_dataframe(df3, concat=True)[0])

        expected = df1.set_index('y', drop=False)
        df4 = df2.set_index('y', drop=False)
        pd.testing.assert_frame_equal(
            expected,
            self.executor.execute_dataframe(df4, concat=True)[0])

    def testILocGetItem(self):
        df1 = pd.DataFrame([[1, 3, 3], [4, 2, 6], [7, 8, 9]],
                           index=['a1', 'a2', 'a3'],
                           columns=['x', 'y', 'z'])
        df2 = md.DataFrame(df1, chunk_size=2)

        # plain index
        expected = df1.iloc[1]
        df3 = df2.iloc[1]
        pd.testing.assert_series_equal(
            expected,
            self.executor.execute_dataframe(df3,
                                            concat=True,
                                            check_series_name=False)[0])

        # plain index on axis 1
        expected = df1.iloc[:2, 1]
        df4 = df2.iloc[:2, 1]
        pd.testing.assert_series_equal(
            expected,
            self.executor.execute_dataframe(df4, concat=True)[0])

        # slice index
        expected = df1.iloc[:, 2:4]
        df5 = df2.iloc[:, 2:4]
        pd.testing.assert_frame_equal(
            expected,
            self.executor.execute_dataframe(df5, concat=True)[0])

        # plain fancy index
        expected = df1.iloc[[0], [0, 1, 2]]
        df6 = df2.iloc[[0], [0, 1, 2]]
        pd.testing.assert_frame_equal(
            expected,
            self.executor.execute_dataframe(df6, concat=True)[0])

        # plain fancy index with shuffled order
        expected = df1.iloc[[0], [1, 2, 0]]
        df7 = df2.iloc[[0], [1, 2, 0]]
        pd.testing.assert_frame_equal(
            expected,
            self.executor.execute_dataframe(df7, concat=True)[0])

        # fancy index
        expected = df1.iloc[[1, 2], [0, 1, 2]]
        df8 = df2.iloc[[1, 2], [0, 1, 2]]
        pd.testing.assert_frame_equal(
            expected,
            self.executor.execute_dataframe(df8, concat=True)[0])

        # fancy index with shuffled order
        expected = df1.iloc[[2, 1], [1, 2, 0]]
        df9 = df2.iloc[[2, 1], [1, 2, 0]]
        pd.testing.assert_frame_equal(
            expected,
            self.executor.execute_dataframe(df9, concat=True)[0])

        # one fancy index
        expected = df1.iloc[[2, 1]]
        df10 = df2.iloc[[2, 1]]
        pd.testing.assert_frame_equal(
            expected,
            self.executor.execute_dataframe(df10, concat=True)[0])

        # plain index
        expected = df1.iloc[1, 2]
        df11 = df2.iloc[1, 2]
        self.assertEqual(expected,
                         self.executor.execute_dataframe(df11, concat=True)[0])

        # bool index array
        expected = df1.iloc[[True, False, True], [2, 1]]
        df12 = df2.iloc[[True, False, True], [2, 1]]
        pd.testing.assert_frame_equal(
            expected,
            self.executor.execute_dataframe(df12, concat=True)[0])

        # bool index array on axis 1
        expected = df1.iloc[[2, 1], [True, False, True]]
        df14 = df2.iloc[[2, 1], [True, False, True]]
        pd.testing.assert_frame_equal(
            expected,
            self.executor.execute_dataframe(df14, concat=True)[0])

        # bool index
        expected = df1.iloc[[True, False, True], [2, 1]]
        df13 = df2.iloc[md.Series([True, False, True], chunk_size=1), [2, 1]]
        pd.testing.assert_frame_equal(
            expected,
            self.executor.execute_dataframe(df13, concat=True)[0])

        # test Series
        data = pd.Series(np.arange(10))
        series = md.Series(data, chunk_size=3).iloc[:3]
        pd.testing.assert_series_equal(
            self.executor.execute_dataframe(series, concat=True)[0],
            data.iloc[:3])

        series = md.Series(data, chunk_size=3).iloc[4]
        self.assertEqual(
            self.executor.execute_dataframe(series, concat=True)[0],
            data.iloc[4])

        series = md.Series(data, chunk_size=3).iloc[[2, 3, 4, 9]]
        pd.testing.assert_series_equal(
            self.executor.execute_dataframe(series, concat=True)[0],
            data.iloc[[2, 3, 4, 9]])

        series = md.Series(data, chunk_size=3).iloc[[4, 3, 9, 2]]
        pd.testing.assert_series_equal(
            self.executor.execute_dataframe(series, concat=True)[0],
            data.iloc[[4, 3, 9, 2]])

        series = md.Series(data).iloc[5:]
        pd.testing.assert_series_equal(
            self.executor.execute_dataframe(series, concat=True)[0],
            data.iloc[5:])

        # bool index array
        selection = np.random.RandomState(0).randint(2, size=10, dtype=bool)
        series = md.Series(data).iloc[selection]
        pd.testing.assert_series_equal(
            self.executor.execute_dataframe(series, concat=True)[0],
            data.iloc[selection])

        # bool index
        series = md.Series(data).iloc[md.Series(selection, chunk_size=4)]
        pd.testing.assert_series_equal(
            self.executor.execute_dataframe(series, concat=True)[0],
            data.iloc[selection])

    def testILocSetItem(self):
        df1 = pd.DataFrame([[1, 3, 3], [4, 2, 6], [7, 8, 9]],
                           index=['a1', 'a2', 'a3'],
                           columns=['x', 'y', 'z'])
        df2 = md.DataFrame(df1, chunk_size=2)

        # plain index
        expected = df1
        expected.iloc[1] = 100
        df2.iloc[1] = 100
        pd.testing.assert_frame_equal(
            expected,
            self.executor.execute_dataframe(df2, concat=True)[0])

        # slice index
        expected.iloc[:, 2:4] = 1111
        df2.iloc[:, 2:4] = 1111
        pd.testing.assert_frame_equal(
            expected,
            self.executor.execute_dataframe(df2, concat=True)[0])

        # plain fancy index
        expected.iloc[[0], [0, 1, 2]] = 2222
        df2.iloc[[0], [0, 1, 2]] = 2222
        pd.testing.assert_frame_equal(
            expected,
            self.executor.execute_dataframe(df2, concat=True)[0])

        # fancy index
        expected.iloc[[1, 2], [0, 1, 2]] = 3333
        df2.iloc[[1, 2], [0, 1, 2]] = 3333
        pd.testing.assert_frame_equal(
            expected,
            self.executor.execute_dataframe(df2, concat=True)[0])

        # plain index
        expected.iloc[1, 2] = 4444
        df2.iloc[1, 2] = 4444
        pd.testing.assert_frame_equal(
            expected,
            self.executor.execute_dataframe(df2, concat=True)[0])

        # test Series
        data = pd.Series(np.arange(10))
        series = md.Series(data, chunk_size=3)
        series.iloc[:3] = 1
        data.iloc[:3] = 1
        pd.testing.assert_series_equal(
            self.executor.execute_dataframe(series, concat=True)[0], data)

        series.iloc[4] = 2
        data.iloc[4] = 2
        pd.testing.assert_series_equal(
            self.executor.execute_dataframe(series, concat=True)[0], data)

        series.iloc[[2, 3, 4, 9]] = 3
        data.iloc[[2, 3, 4, 9]] = 3
        pd.testing.assert_series_equal(
            self.executor.execute_dataframe(series, concat=True)[0], data)

        series.iloc[5:] = 4
        data.iloc[5:] = 4
        pd.testing.assert_series_equal(
            self.executor.execute_dataframe(series, concat=True)[0], data)

    def testLocGetItem(self):
        rs = np.random.RandomState(0)
        # index and columns are labels
        raw1 = pd.DataFrame(rs.randint(10, size=(5, 4)),
                            index=['a1', 'a2', 'a3', 'a4', 'a5'],
                            columns=['a', 'b', 'c', 'd'])
        # columns are labels
        raw2 = raw1.copy()
        raw2.reset_index(inplace=True, drop=True)
        # columns are non unique and monotonic
        raw3 = raw1.copy()
        raw3.columns = ['a', 'b', 'b', 'd']
        # columns are non unique and non monotonic
        raw4 = raw1.copy()
        raw4.columns = ['b', 'a', 'b', 'd']
        # index that is timestamp
        raw5 = raw1.copy()
        raw5.index = pd.date_range('2020-1-1', periods=5)

        df1 = md.DataFrame(raw1, chunk_size=2)
        df2 = md.DataFrame(raw2, chunk_size=2)
        df3 = md.DataFrame(raw3, chunk_size=2)
        df4 = md.DataFrame(raw4, chunk_size=2)
        df5 = md.DataFrame(raw5, chunk_size=2)

        df = df2.loc[3, 'b']
        result = self.executor.execute_tensor(df, concat=True)[0]
        expected = raw2.loc[3, 'b']
        self.assertEqual(result, expected)

        df = df1.loc['a3', 'b']
        result = self.executor.execute_tensor(df,
                                              concat=True,
                                              check_shape=False)[0]
        expected = raw1.loc['a3', 'b']
        self.assertEqual(result, expected)

        df = df2.loc[1:4, 'b':'d']
        result = self.executor.execute_dataframe(df, concat=True)[0]
        expected = raw2.loc[1:4, 'b':'d']
        pd.testing.assert_frame_equal(result, expected)

        df = df2.loc[:4, 'b':]
        result = self.executor.execute_dataframe(df, concat=True)[0]
        expected = raw2.loc[:4, 'b':]
        pd.testing.assert_frame_equal(result, expected)

        # slice on axis index whose index_value does not have value
        df = df1.loc['a2':'a4', 'b':]
        result = self.executor.execute_dataframe(df, concat=True)[0]
        expected = raw1.loc['a2':'a4', 'b':]
        pd.testing.assert_frame_equal(result, expected)

        df = df2.loc[:, 'b']
        result = self.executor.execute_dataframe(df, concat=True)[0]
        expected = raw2.loc[:, 'b']
        pd.testing.assert_series_equal(result, expected)

        # 'b' is non-unique
        df = df3.loc[:, 'b']
        result = self.executor.execute_dataframe(df, concat=True)[0]
        expected = raw3.loc[:, 'b']
        pd.testing.assert_frame_equal(result, expected)

        # 'b' is non-unique, and non-monotonic
        df = df4.loc[:, 'b']
        result = self.executor.execute_dataframe(df, concat=True)[0]
        expected = raw4.loc[:, 'b']
        pd.testing.assert_frame_equal(result, expected)

        # label on axis 0
        df = df1.loc['a2', :]
        result = self.executor.execute_dataframe(df, concat=True)[0]
        expected = raw1.loc['a2', :]
        pd.testing.assert_series_equal(result, expected)

        # label-based fancy index
        df = df2.loc[[3, 0, 1], ['c', 'a', 'd']]
        result = self.executor.execute_dataframe(df, concat=True)[0]
        expected = raw2.loc[[3, 0, 1], ['c', 'a', 'd']]
        pd.testing.assert_frame_equal(result, expected)

        # label-based fancy index, asc sorted
        df = df2.loc[[0, 1, 3], ['a', 'c', 'd']]
        result = self.executor.execute_dataframe(df, concat=True)[0]
        expected = raw2.loc[[0, 1, 3], ['a', 'c', 'd']]
        pd.testing.assert_frame_equal(result, expected)

        # label-based fancy index in which non-unique exists
        selection = rs.randint(2, size=(5, ), dtype=bool)
        df = df3.loc[selection, ['b', 'a', 'd']]
        result = self.executor.execute_dataframe(df, concat=True)[0]
        expected = raw3.loc[selection, ['b', 'a', 'd']]
        pd.testing.assert_frame_equal(result, expected)

        df = df3.loc[md.Series(selection), ['b', 'a', 'd']]
        result = self.executor.execute_dataframe(df, concat=True)[0]
        expected = raw3.loc[selection, ['b', 'a', 'd']]
        pd.testing.assert_frame_equal(result, expected)

        # label-based fancy index on index
        # whose index_value does not have value
        df = df1.loc[['a3', 'a1'], ['b', 'a', 'd']]
        result = self.executor.execute_dataframe(df, concat=True)[0]
        expected = raw1.loc[['a3', 'a1'], ['b', 'a', 'd']]
        pd.testing.assert_frame_equal(result, expected)

        # get timestamp by str
        df = df5.loc['20200101']
        result = self.executor.execute_dataframe(df,
                                                 concat=True,
                                                 check_series_name=False)[0]
        expected = raw5.loc['20200101']
        pd.testing.assert_series_equal(result, expected)

        # get timestamp by str, return scalar
        df = df5.loc['2020-1-1', 'c']
        result = self.executor.execute_dataframe(df, concat=True)[0]
        expected = raw5.loc['2020-1-1', 'c']
        self.assertEqual(result, expected)

    def testDataFrameGetitem(self):
        data = pd.DataFrame(np.random.rand(10, 5),
                            columns=['c1', 'c2', 'c3', 'c4', 'c5'])
        df = md.DataFrame(data, chunk_size=2)
        data2 = data.copy()
        data2.index = pd.date_range('2020-1-1', periods=10)
        mdf = md.DataFrame(data2, chunk_size=3)

        series1 = df['c2']
        pd.testing.assert_series_equal(
            self.executor.execute_dataframe(series1, concat=True)[0],
            data['c2'])

        series2 = df['c5']
        pd.testing.assert_series_equal(
            self.executor.execute_dataframe(series2, concat=True)[0],
            data['c5'])

        df1 = df[['c1', 'c2', 'c3']]
        pd.testing.assert_frame_equal(
            self.executor.execute_dataframe(df1, concat=True)[0],
            data[['c1', 'c2', 'c3']])

        df2 = df[['c3', 'c2', 'c1']]
        pd.testing.assert_frame_equal(
            self.executor.execute_dataframe(df2, concat=True)[0],
            data[['c3', 'c2', 'c1']])

        df3 = df[['c1']]
        pd.testing.assert_frame_equal(
            self.executor.execute_dataframe(df3, concat=True)[0], data[['c1']])

        df4 = df[['c3', 'c1', 'c2', 'c1']]
        pd.testing.assert_frame_equal(
            self.executor.execute_dataframe(df4, concat=True)[0],
            data[['c3', 'c1', 'c2', 'c1']])

        df5 = df[np.array(['c1', 'c2', 'c3'])]
        pd.testing.assert_frame_equal(
            self.executor.execute_dataframe(df5, concat=True)[0],
            data[['c1', 'c2', 'c3']])

        df6 = df[['c3', 'c2', 'c1']]
        pd.testing.assert_frame_equal(
            self.executor.execute_dataframe(df6, concat=True)[0],
            data[['c3', 'c2', 'c1']])

        df7 = df[1:7:2]
        pd.testing.assert_frame_equal(
            self.executor.execute_dataframe(df7, concat=True)[0], data[1:7:2])

        series3 = df['c1'][0]
        self.assertEqual(
            self.executor.execute_dataframe(series3, concat=True)[0],
            data['c1'][0])

        df8 = mdf[3:7]
        pd.testing.assert_frame_equal(
            self.executor.execute_dataframe(df8, concat=True)[0], data2[3:7])

        df9 = mdf['2020-1-2':'2020-1-5']
        pd.testing.assert_frame_equal(
            self.executor.execute_dataframe(df9, concat=True)[0],
            data2['2020-1-2':'2020-1-5'])

    def testDataFrameGetitemBool(self):
        data = pd.DataFrame(np.random.rand(10, 5),
                            columns=['c1', 'c2', 'c3', 'c4', 'c5'])
        df = md.DataFrame(data, chunk_size=2)

        mask_data = data.c1 > 0.5
        mask = md.Series(mask_data, chunk_size=2)

        # getitem by mars series
        self.assertEqual(
            self.executor.execute_dataframe(df[mask], concat=True)[0].shape,
            data[mask_data].shape)
        pd.testing.assert_frame_equal(
            self.executor.execute_dataframe(df[mask], concat=True)[0],
            data[mask_data])

        # getitem by pandas series
        pd.testing.assert_frame_equal(
            self.executor.execute_dataframe(df[mask_data], concat=True)[0],
            data[mask_data])

        # getitem by mars series with alignment but no shuffle
        mask_data = pd.Series(
            [True, True, True, False, False, True, True, False, False, True],
            index=range(9, -1, -1))
        mask = md.Series(mask_data, chunk_size=2)
        pd.testing.assert_frame_equal(
            self.executor.execute_dataframe(df[mask], concat=True)[0],
            data[mask_data])

        # getitem by mars series with shuffle alignment
        mask_data = pd.Series(
            [True, True, True, False, False, True, True, False, False, True],
            index=[0, 3, 6, 2, 9, 8, 5, 7, 1, 4])
        mask = md.Series(mask_data, chunk_size=2)
        pd.testing.assert_frame_equal(
            self.executor.execute_dataframe(df[mask],
                                            concat=True)[0].sort_index(),
            data[mask_data])

        # getitem by mars series with shuffle alignment and extra element
        mask_data = pd.Series([
            True, True, True, False, False, True, True, False, False, True,
            False
        ],
                              index=[0, 3, 6, 2, 9, 8, 5, 7, 1, 4, 10])
        mask = md.Series(mask_data, chunk_size=2)
        pd.testing.assert_frame_equal(
            self.executor.execute_dataframe(df[mask],
                                            concat=True)[0].sort_index(),
            data[mask_data])

        # getitem by DataFrame with all bool columns
        r = df[df > 0.5]
        result = self.executor.execute_dataframe(r, concat=True)[0]
        pd.testing.assert_frame_equal(result, data[data > 0.5])

    def testDataFrameGetitemUsingAttr(self):
        data = pd.DataFrame(np.random.rand(10, 5),
                            columns=['c1', 'c2', 'key', 'dtypes', 'size'])
        df = md.DataFrame(data, chunk_size=2)

        series1 = df.c2
        pd.testing.assert_series_equal(
            self.executor.execute_dataframe(series1, concat=True)[0], data.c2)

        # accessing column using attribute shouldn't overwrite existing attributes
        self.assertEqual(df.key, getattr(getattr(df, '_data'), '_key'))
        self.assertEqual(df.size, data.size)
        pd.testing.assert_series_equal(df.dtypes, data.dtypes)

        # accessing non-existing attributes should trigger exception
        with self.assertRaises(AttributeError):
            _ = df.zzz  # noqa: F841

    def testSeriesGetitem(self):
        data = pd.Series(np.random.rand(10))
        series = md.Series(data)
        self.assertEqual(
            self.executor.execute_dataframe(series[1], concat=True)[0],
            data[1])

        data = pd.Series(np.random.rand(10), name='a')
        series = md.Series(data, chunk_size=4)

        for i in range(10):
            series1 = series[i]
            self.assertEqual(
                self.executor.execute_dataframe(series1, concat=True)[0],
                data[i])

        series2 = series[[0, 1, 2, 3, 4]]
        pd.testing.assert_series_equal(
            self.executor.execute_dataframe(series2, concat=True)[0],
            data[[0, 1, 2, 3, 4]])

        series3 = series[[4, 3, 2, 1, 0]]
        pd.testing.assert_series_equal(
            self.executor.execute_dataframe(series3, concat=True)[0],
            data[[4, 3, 2, 1, 0]])

        series4 = series[[1, 2, 3, 2, 1, 0]]
        pd.testing.assert_series_equal(
            self.executor.execute_dataframe(series4, concat=True)[0],
            data[[1, 2, 3, 2, 1, 0]])
        #
        index = ['i' + str(i) for i in range(20)]
        data = pd.Series(np.random.rand(20), index=index, name='a')
        series = md.Series(data, chunk_size=3)

        for idx in index:
            series1 = series[idx]
            self.assertEqual(
                self.executor.execute_dataframe(series1, concat=True)[0],
                data[idx])

        selected = ['i1', 'i2', 'i3', 'i4', 'i5']
        series2 = series[selected]
        pd.testing.assert_series_equal(
            self.executor.execute_dataframe(series2, concat=True)[0],
            data[selected])

        selected = ['i4', 'i7', 'i0', 'i1', 'i5']
        series3 = series[selected]
        pd.testing.assert_series_equal(
            self.executor.execute_dataframe(series3, concat=True)[0],
            data[selected])

        selected = ['i0', 'i1', 'i5', 'i4', 'i0', 'i1']
        series4 = series[selected]
        pd.testing.assert_series_equal(
            self.executor.execute_dataframe(series4, concat=True)[0],
            data[selected])

        selected = ['i0']
        series5 = series[selected]
        pd.testing.assert_series_equal(
            self.executor.execute_dataframe(series5, concat=True)[0],
            data[selected])

    def testHead(self):
        data = pd.DataFrame(np.random.rand(10, 5),
                            columns=['c1', 'c2', 'c3', 'c4', 'c5'])
        df = md.DataFrame(data, chunk_size=2)

        pd.testing.assert_frame_equal(
            self.executor.execute_dataframe(df.head(), concat=True)[0],
            data.head())
        pd.testing.assert_frame_equal(
            self.executor.execute_dataframe(df.head(3), concat=True)[0],
            data.head(3))
        pd.testing.assert_frame_equal(
            self.executor.execute_dataframe(df.head(-3), concat=True)[0],
            data.head(-3))
        pd.testing.assert_frame_equal(
            self.executor.execute_dataframe(df.head(8), concat=True)[0],
            data.head(8))
        pd.testing.assert_frame_equal(
            self.executor.execute_dataframe(df.head(-8), concat=True)[0],
            data.head(-8))
        pd.testing.assert_frame_equal(
            self.executor.execute_dataframe(df.head(13), concat=True)[0],
            data.head(13))
        pd.testing.assert_frame_equal(
            self.executor.execute_dataframe(df.head(-13), concat=True)[0],
            data.head(-13))

    def testTail(self):
        data = pd.DataFrame(np.random.rand(10, 5),
                            columns=['c1', 'c2', 'c3', 'c4', 'c5'])
        df = md.DataFrame(data, chunk_size=2)

        pd.testing.assert_frame_equal(
            self.executor.execute_dataframe(df.tail(), concat=True)[0],
            data.tail())
        pd.testing.assert_frame_equal(
            self.executor.execute_dataframe(df.tail(3), concat=True)[0],
            data.tail(3))
        pd.testing.assert_frame_equal(
            self.executor.execute_dataframe(df.tail(-3), concat=True)[0],
            data.tail(-3))
        pd.testing.assert_frame_equal(
            self.executor.execute_dataframe(df.tail(8), concat=True)[0],
            data.tail(8))
        pd.testing.assert_frame_equal(
            self.executor.execute_dataframe(df.tail(-8), concat=True)[0],
            data.tail(-8))
        pd.testing.assert_frame_equal(
            self.executor.execute_dataframe(df.tail(13), concat=True)[0],
            data.tail(13))
        pd.testing.assert_frame_equal(
            self.executor.execute_dataframe(df.tail(-13), concat=True)[0],
            data.tail(-13))

    def testAt(self):
        data = pd.DataFrame(np.random.rand(10, 5),
                            columns=['c' + str(i) for i in range(5)],
                            index=['i' + str(i) for i in range(10)])
        df = md.DataFrame(data, chunk_size=3)

        with self.assertRaises(ValueError):
            _ = df.at[['i3, i4'], 'c1']

        result = self.executor.execute_dataframe(df.at['i3', 'c1'],
                                                 concat=True)[0]
        self.assertEqual(result, data.at['i3', 'c1'])

        result = self.executor.execute_dataframe(df['c1'].at['i2'],
                                                 concat=True)[0]
        self.assertEqual(result, data['c1'].at['i2'])

    def testIAt(self):
        data = pd.DataFrame(np.random.rand(10, 5),
                            columns=['c' + str(i) for i in range(5)],
                            index=['i' + str(i) for i in range(10)])
        df = md.DataFrame(data, chunk_size=3)

        with self.assertRaises(ValueError):
            _ = df.iat[[1, 2], 3]

        result = self.executor.execute_dataframe(df.iat[3, 4], concat=True)[0]
        self.assertEqual(result, data.iat[3, 4])

        result = self.executor.execute_dataframe(df.iloc[:, 2].iat[3],
                                                 concat=True)[0]
        self.assertEqual(result, data.iloc[:, 2].iat[3])
Ejemplo n.º 13
0
class Test(TestBase):
    def setUp(self):
        self.executor = ExecutorForTest('numpy')

    def testIndexTricks(self):
        mgrid = nd_grid()
        g = mgrid[0:5, 0:5]
        g.tiles()  # tileable means no loop exists

        ogrid = nd_grid(sparse=True)
        o = ogrid[0:5, 0:5]
        [ob.tiles() for ob in o]  # tilesable means no loop exists

    def testR_(self):
        r = mt.r_[mt.array([1, 2, 3]), 0, 0, mt.array([4, 5, 6])]

        result = self.executor.execute_tensor(r, concat=True)[0]
        expected = np.r_[np.array([1, 2, 3]), 0, 0, np.array([4, 5, 6])]

        np.testing.assert_array_equal(result, expected)

        r = mt.r_[-1:1:6j, [0] * 3, 5, 6]

        result = self.executor.execute_tensor(r, concat=True)[0]
        expected = np.r_[-1:1:6j, [0] * 3, 5, 6]

        np.testing.assert_array_equal(result, expected)

        r = mt.r_[-1:1:6j]

        result = self.executor.execute_tensor(r, concat=True)[0]
        expected = np.r_[-1:1:6j]

        np.testing.assert_array_equal(result, expected)

        raw = [[0, 1, 2], [3, 4, 5]]
        a = mt.array(raw, chunk_size=2)
        r = mt.r_['-1', a, a]

        result = self.executor.execute_tensor(r, concat=True)[0]
        expected = np.r_['-1', raw, raw]

        np.testing.assert_array_equal(result, expected)

        r = mt.r_['0,2', [1, 2, 3], [4, 5, 6]]

        result = self.executor.execute_tensor(r, concat=True)[0]
        expected = np.r_['0,2', [1, 2, 3], [4, 5, 6]]

        np.testing.assert_array_equal(result, expected)

        r = mt.r_['0,2,0', [1, 2, 3], [4, 5, 6]]

        result = self.executor.execute_tensor(r, concat=True)[0]
        expected = np.r_['0,2,0', [1, 2, 3], [4, 5, 6]]
        np.testing.assert_array_equal(result, expected)

        r = mt.r_['1,2,0', [1, 2, 3], [4, 5, 6]]

        result = self.executor.execute_tensor(r, concat=True)[0]
        expected = np.r_['1,2,0', [1, 2, 3], [4, 5, 6]]
        np.testing.assert_array_equal(result, expected)

        self.assertEqual(len(mt.r_), 0)

        with self.assertRaises(ValueError):
            _ = mt.r_[:3, 'wrong']

    def testC_(self):
        r = mt.c_[mt.array([1, 2, 3]), mt.array([4, 5, 6])]

        result = self.executor.execute_tensor(r, concat=True)[0]
        expected = np.c_[np.array([1, 2, 3]), np.array([4, 5, 6])]
        np.testing.assert_array_equal(result, expected)

        r = mt.c_[mt.array([[1, 2, 3]]), 0, 0, mt.array([[4, 5, 6]])]

        result = self.executor.execute_tensor(r, concat=True)[0]
        expected = np.c_[np.array([[1, 2, 3]]), 0, 0, np.array([[4, 5, 6]])]
        np.testing.assert_array_equal(result, expected)

        r = mt.c_[:3, 1:4]
        result = self.executor.execute_tensor(r, concat=True)[0]
        expected = np.c_[:3, 1:4]
        np.testing.assert_array_equal(result, expected)
Ejemplo n.º 14
0
class Test(TestBase):
    def setUp(self):
        super().setUp()
        self.executor = ExecutorForTest('numpy')

    @unittest.skipIf(tiledb is None, 'tiledb not installed')
    def testStoreTileDBExecution(self):
        ctx = tiledb.Ctx()

        tempdir = tempfile.mkdtemp()
        try:
            # store TileDB dense array
            expected = np.random.rand(8, 4, 3)
            a = tensor(expected, chunk_size=(3, 3, 2))
            save = totiledb(tempdir, a, ctx=ctx)
            self.executor.execute_tensor(save)

            with tiledb.DenseArray(uri=tempdir, ctx=ctx) as arr:
                np.testing.assert_allclose(expected, arr.read_direct())
        finally:
            shutil.rmtree(tempdir)

        tempdir = tempfile.mkdtemp()
        try:
            # store tensor with 1 chunk to TileDB dense array
            a = arange(12)
            save = totiledb(tempdir, a, ctx=ctx)
            self.executor.execute_tensor(save)

            with tiledb.DenseArray(uri=tempdir, ctx=ctx) as arr:
                np.testing.assert_allclose(np.arange(12), arr.read_direct())
        finally:
            shutil.rmtree(tempdir)

        tempdir = tempfile.mkdtemp()
        try:
            # store 2-d TileDB sparse array
            expected = sps.random(8, 7, density=0.1)
            a = tensor(expected, chunk_size=(3, 5))
            save = totiledb(tempdir, a, ctx=ctx)
            self.executor.execute_tensor(save)

            with tiledb.SparseArray(uri=tempdir, ctx=ctx) as arr:
                data = arr[:, :]
                coords = data['coords']
                value = data[arr.attr(0).name]
                ij = tuple(coords[arr.domain.dim(k).name]
                           for k in range(arr.ndim))
                result = sps.coo_matrix((value, ij), shape=arr.shape)

                np.testing.assert_allclose(expected.toarray(),
                                           result.toarray())
        finally:
            shutil.rmtree(tempdir)

        tempdir = tempfile.mkdtemp()
        try:
            # store TileDB dense array
            expected = np.asfortranarray(np.random.rand(8, 4, 3))
            a = tensor(expected, chunk_size=(3, 3, 2))
            save = totiledb(tempdir, a, ctx=ctx)
            self.executor.execute_tensor(save)

            with tiledb.DenseArray(uri=tempdir, ctx=ctx) as arr:
                np.testing.assert_allclose(expected, arr.read_direct())
                self.assertEqual(arr.schema.cell_order, 'col-major')
        finally:
            shutil.rmtree(tempdir)
Ejemplo n.º 15
0
class Test(unittest.TestCase):
    def setUp(self) -> None:
        self.executor = ExecutorForTest('numpy')

    def testEuclideanDistancesOp(self):
        x = mt.random.rand(10, 3)
        xx = mt.random.rand(1, 10)
        y = mt.random.rand(11, 3)

        d = euclidean_distances(x, X_norm_squared=xx)
        self.assertEqual(d.op.x_norm_squared.key, check_array(xx).T.key)

        d = euclidean_distances(
            x,
            y,
            X_norm_squared=mt.random.rand(10, 1, dtype=mt.float32),
            Y_norm_squared=mt.random.rand(1, 11, dtype=mt.float32))
        self.assertIsNone(d.op.x_norm_squared)
        self.assertIsNone(d.op.y_norm_squared)

        # XX shape incompatible
        with self.assertRaises(ValueError):
            euclidean_distances(x, X_norm_squared=mt.random.rand(10))

        # XX shape incompatible
        with self.assertRaises(ValueError):
            euclidean_distances(x, X_norm_squared=mt.random.rand(11, 1))

        # YY shape incompatible
        with self.assertRaises(ValueError):
            euclidean_distances(x, y, Y_norm_squared=mt.random.rand(10))

    def testEuclideanDistancesExecution(self):
        dense_raw_x = np.random.rand(30, 10)
        dense_raw_y = np.random.rand(40, 10)
        sparse_raw_x = SparseNDArray(
            sps.random(30, 10, density=0.5, format='csr'))
        sparse_raw_y = SparseNDArray(
            sps.random(40, 10, density=0.5, format='csr'))

        for raw_x, raw_y in [(dense_raw_x, dense_raw_y),
                             (sparse_raw_x, sparse_raw_y)]:
            x = mt.tensor(raw_x, chunk_size=9)
            y = mt.tensor(raw_y, chunk_size=7)

            distance = euclidean_distances(x, y)

            result = self.executor.execute_tensor(distance, concat=True)[0]
            expected = sk_euclidean_distances(raw_x, Y=raw_y)
            np.testing.assert_almost_equal(result, expected)

            x_norm = x.sum(axis=1)[..., np.newaxis]
            y_norm = y.sum(axis=1)[np.newaxis, ...]
            distance = euclidean_distances(x,
                                           y,
                                           X_norm_squared=x_norm,
                                           Y_norm_squared=y_norm)
            x_raw_norm = raw_x.sum(axis=1)[..., np.newaxis]
            y_raw_norm = raw_y.sum(axis=1)[np.newaxis, ...]

            result = self.executor.execute_tensor(distance, concat=True)[0]
            expected = sk_euclidean_distances(raw_x,
                                              raw_y,
                                              X_norm_squared=x_raw_norm,
                                              Y_norm_squared=y_raw_norm)
            np.testing.assert_almost_equal(result, expected)

            x_sq = (x**2).astype(np.float32)
            y_sq = (y**2).astype(np.float32)

            distance = euclidean_distances(x_sq, y_sq, squared=True)

            x_raw_sq = (raw_x**2).astype(np.float32)
            y_raw_sq = (raw_y**2).astype(np.float32)

            result = self.executor.execute_tensor(distance, concat=True)[0]
            expected = sk_euclidean_distances(x_raw_sq, y_raw_sq, squared=True)
            np.testing.assert_almost_equal(result, expected, decimal=6)

            # test x is y
            distance = euclidean_distances(x)

            result = self.executor.execute_tensor(distance, concat=True)[0]
            expected = sk_euclidean_distances(raw_x)

            np.testing.assert_almost_equal(result, expected)
Ejemplo n.º 16
0
class Test(TestBase):
    def setUp(self):
        super().setUp()
        self.executor = ExecutorForTest()

    def testFromPandasDataFrameExecution(self):
        pdf = pd.DataFrame(np.random.rand(20, 30),
                           index=[np.arange(20),
                                  np.arange(20, 0, -1)])
        df = from_pandas_df(pdf, chunk_size=(13, 21))

        result = self.executor.execute_dataframe(df, concat=True)[0]
        pd.testing.assert_frame_equal(pdf, result)

    def testFromPandasSeriesExecution(self):
        ps = pd.Series(np.random.rand(20),
                       index=[np.arange(20),
                              np.arange(20, 0, -1)],
                       name='a')
        series = from_pandas_series(ps, chunk_size=13)

        result = self.executor.execute_dataframe(series, concat=True)[0]
        pd.testing.assert_series_equal(ps, result)

    def testInitializerExecution(self):
        pdf = pd.DataFrame(np.random.rand(20, 30),
                           index=[np.arange(20),
                                  np.arange(20, 0, -1)])
        df = md.DataFrame(pdf, chunk_size=(15, 10))
        result = self.executor.execute_dataframe(df, concat=True)[0]
        pd.testing.assert_frame_equal(pdf, result)

        ps = pd.Series(np.random.rand(20),
                       index=[np.arange(20),
                              np.arange(20, 0, -1)],
                       name='a')
        series = md.Series(ps, chunk_size=7)
        result = self.executor.execute_dataframe(series, concat=True)[0]
        pd.testing.assert_series_equal(ps, result)

    def testSeriesFromTensor(self):
        data = np.random.rand(10)
        series = md.Series(mt.tensor(data), name='a')
        pd.testing.assert_series_equal(
            self.executor.execute_dataframe(series, concat=True)[0],
            pd.Series(data, name='a'))

        series = md.Series(mt.tensor(data, chunk_size=3))
        pd.testing.assert_series_equal(
            self.executor.execute_dataframe(series, concat=True)[0],
            pd.Series(data))

        series = md.Series(mt.ones((10, ), chunk_size=4))
        pd.testing.assert_series_equal(
            self.executor.execute_dataframe(series, concat=True)[0],
            pd.Series(np.ones(10, )))

        index_data = np.random.rand(10)
        series = md.Series(mt.tensor(data, chunk_size=3),
                           name='a',
                           index=mt.tensor(index_data, chunk_size=4))
        pd.testing.assert_series_equal(
            self.executor.execute_dataframe(series, concat=True)[0],
            pd.Series(data, name='a', index=index_data))

    def testFromTensorExecution(self):
        tensor = mt.random.rand(10, 10, chunk_size=5)
        df = dataframe_from_tensor(tensor)
        tensor_res = self.executor.execute_tensor(tensor, concat=True)[0]
        pdf_expected = pd.DataFrame(tensor_res)
        df_result = self.executor.execute_dataframe(df, concat=True)[0]
        pd.testing.assert_index_equal(df_result.index, pd.RangeIndex(0, 10))
        pd.testing.assert_index_equal(df_result.columns, pd.RangeIndex(0, 10))
        pd.testing.assert_frame_equal(df_result, pdf_expected)

        # test converted with specified index_value and columns
        tensor2 = mt.random.rand(2, 2, chunk_size=1)
        df2 = dataframe_from_tensor(tensor2,
                                    index=pd.Index(['a', 'b']),
                                    columns=pd.Index([3, 4]))
        df_result = self.executor.execute_dataframe(df2, concat=True)[0]
        pd.testing.assert_index_equal(df_result.index, pd.Index(['a', 'b']))
        pd.testing.assert_index_equal(df_result.columns, pd.Index([3, 4]))

        # test converted from 1-d tensor
        tensor3 = mt.array([1, 2, 3])
        df3 = dataframe_from_tensor(tensor3)
        result3 = self.executor.execute_dataframe(df3, concat=True)[0]
        pdf_expected = pd.DataFrame(np.array([1, 2, 3]))
        pd.testing.assert_frame_equal(pdf_expected, result3)

        # test converted from identical chunks
        tensor4 = mt.ones((10, 10), chunk_size=3)
        df4 = dataframe_from_tensor(tensor4)
        result4 = self.executor.execute_dataframe(df4, concat=True)[0]
        pdf_expected = pd.DataFrame(
            self.executor.execute_tensor(tensor4, concat=True)[0])
        pd.testing.assert_frame_equal(pdf_expected, result4)

        # from tensor with given index
        tensor5 = mt.ones((10, 10), chunk_size=3)
        df5 = dataframe_from_tensor(tensor5, index=np.arange(0, 20, 2))
        result5 = self.executor.execute_dataframe(df5, concat=True)[0]
        pdf_expected = pd.DataFrame(self.executor.execute_tensor(
            tensor5, concat=True)[0],
                                    index=np.arange(0, 20, 2))
        pd.testing.assert_frame_equal(pdf_expected, result5)

        # from tensor with given index that is a tensor
        raw7 = np.random.rand(10, 10)
        tensor7 = mt.tensor(raw7, chunk_size=3)
        index_raw7 = np.random.rand(10)
        index7 = mt.tensor(index_raw7, chunk_size=4)
        df7 = dataframe_from_tensor(tensor7, index=index7)
        result7 = self.executor.execute_dataframe(df7, concat=True)[0]
        pdf_expected = pd.DataFrame(raw7, index=index_raw7)
        pd.testing.assert_frame_equal(pdf_expected, result7)

        # from tensor with given columns
        tensor6 = mt.ones((10, 10), chunk_size=3)
        df6 = dataframe_from_tensor(tensor6, columns=list('abcdefghij'))
        result6 = self.executor.execute_dataframe(df6, concat=True)[0]
        pdf_expected = pd.DataFrame(self.executor.execute_tensor(
            tensor6, concat=True)[0],
                                    columns=list('abcdefghij'))
        pd.testing.assert_frame_equal(pdf_expected, result6)

        # from 1d tensors
        raws8 = [('a', np.random.rand(8)), ('b', np.random.randint(10,
                                                                   size=8)),
                 ('c', [
                     ''.join(np.random.choice(list(printable), size=6))
                     for _ in range(8)
                 ])]
        tensors8 = [mt.tensor(r[1], chunk_size=3) for r in raws8]
        df8 = dataframe_from_1d_tensors(tensors8,
                                        columns=[r[0] for r in raws8])
        result = self.executor.execute_dataframe(df8, concat=True)[0]
        pdf_expected = pd.DataFrame(OrderedDict(raws8))
        pd.testing.assert_frame_equal(result, pdf_expected)

        # from 1d tensors and specify index with a tensor
        index_raw9 = np.random.rand(8)
        index9 = mt.tensor(index_raw9, chunk_size=4)
        df9 = dataframe_from_1d_tensors(tensors8,
                                        columns=[r[0] for r in raws8],
                                        index=index9)
        result = self.executor.execute_dataframe(df9, concat=True)[0]
        pdf_expected = pd.DataFrame(OrderedDict(raws8), index=index_raw9)
        pd.testing.assert_frame_equal(result, pdf_expected)

    def testFromRecordsExecution(self):
        dtype = np.dtype([('x', 'int'), ('y', 'double'), ('z', '<U16')])

        ndarr = np.ones((10, ), dtype=dtype)
        pdf_expected = pd.DataFrame.from_records(ndarr,
                                                 index=pd.RangeIndex(10))

        # from structured array of mars
        tensor = mt.ones((10, ), dtype=dtype, chunk_size=3)
        df1 = from_records(tensor)
        df1_result = self.executor.execute_dataframe(df1, concat=True)[0]
        pd.testing.assert_frame_equal(df1_result, pdf_expected)

        # from structured array of numpy
        df2 = from_records(ndarr)
        df2_result = self.executor.execute_dataframe(df2, concat=True)[0]
        pd.testing.assert_frame_equal(df2_result, pdf_expected)

    def testReadCSVExecution(self):
        tempdir = tempfile.mkdtemp()
        file_path = os.path.join(tempdir, 'test.csv')
        try:
            df = pd.DataFrame(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
                              columns=['a', 'b', 'c'])
            df.to_csv(file_path)

            pdf = pd.read_csv(file_path, index_col=0)
            mdf = self.executor.execute_dataframe(md.read_csv(file_path,
                                                              index_col=0),
                                                  concat=True)[0]
            pd.testing.assert_frame_equal(pdf, mdf)

            mdf2 = self.executor.execute_dataframe(md.read_csv(file_path,
                                                               index_col=0,
                                                               chunk_bytes=10),
                                                   concat=True)[0]
            pd.testing.assert_frame_equal(pdf, mdf2)

        finally:
            shutil.rmtree(tempdir)

        # test sep
        tempdir = tempfile.mkdtemp()
        file_path = os.path.join(tempdir, 'test.csv')
        try:
            df = pd.DataFrame(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
                              columns=['a', 'b', 'c'])
            df.to_csv(file_path, sep=';')

            pdf = pd.read_csv(file_path, sep=';', index_col=0)
            mdf = self.executor.execute_dataframe(md.read_csv(file_path,
                                                              sep=';',
                                                              index_col=0),
                                                  concat=True)[0]
            pd.testing.assert_frame_equal(pdf, mdf)

            mdf2 = self.executor.execute_dataframe(md.read_csv(file_path,
                                                               sep=';',
                                                               index_col=0,
                                                               chunk_bytes=10),
                                                   concat=True)[0]
            pd.testing.assert_frame_equal(pdf, mdf2)

        finally:
            shutil.rmtree(tempdir)

        # test missing value
        tempdir = tempfile.mkdtemp()
        file_path = os.path.join(tempdir, 'test.csv')
        try:
            df = pd.DataFrame({
                'c1': [np.nan, 'a', 'b', 'c'],
                'c2': [1, 2, 3, np.nan],
                'c3': [np.nan, np.nan, 3.4, 2.2]
            })
            df.to_csv(file_path)

            pdf = pd.read_csv(file_path, index_col=0)
            mdf = self.executor.execute_dataframe(md.read_csv(file_path,
                                                              index_col=0),
                                                  concat=True)[0]
            pd.testing.assert_frame_equal(pdf, mdf)

            mdf2 = self.executor.execute_dataframe(md.read_csv(file_path,
                                                               index_col=0,
                                                               chunk_bytes=12),
                                                   concat=True)[0]
            pd.testing.assert_frame_equal(pdf, mdf2)

        finally:
            shutil.rmtree(tempdir)

        tempdir = tempfile.mkdtemp()
        file_path = os.path.join(tempdir, 'test.csv')
        try:
            index = pd.date_range(start='1/1/2018', periods=100)
            df = pd.DataFrame(
                {
                    'col1': np.random.rand(100),
                    'col2': np.random.choice(['a', 'b', 'c'], (100, )),
                    'col3': np.arange(100)
                },
                index=index)
            df.to_csv(file_path)

            pdf = pd.read_csv(file_path, index_col=0)
            mdf = self.executor.execute_dataframe(md.read_csv(file_path,
                                                              index_col=0),
                                                  concat=True)[0]
            pd.testing.assert_frame_equal(pdf, mdf)

            mdf2 = self.executor.execute_dataframe(md.read_csv(
                file_path, index_col=0, chunk_bytes=100),
                                                   concat=True)[0]
            pd.testing.assert_frame_equal(pdf, mdf2)

        finally:
            shutil.rmtree(tempdir)

        # test compression
        tempdir = tempfile.mkdtemp()
        file_path = os.path.join(tempdir, 'test.gzip')
        try:
            index = pd.date_range(start='1/1/2018', periods=100)
            df = pd.DataFrame(
                {
                    'col1': np.random.rand(100),
                    'col2': np.random.choice(['a', 'b', 'c'], (100, )),
                    'col3': np.arange(100)
                },
                index=index)
            df.to_csv(file_path, compression='gzip')

            pdf = pd.read_csv(file_path, compression='gzip', index_col=0)
            mdf = self.executor.execute_dataframe(md.read_csv(
                file_path, compression='gzip', index_col=0),
                                                  concat=True)[0]
            pd.testing.assert_frame_equal(pdf, mdf)

            mdf2 = self.executor.execute_dataframe(md.read_csv(
                file_path, compression='gzip', index_col=0, chunk_bytes='1k'),
                                                   concat=True)[0]
            pd.testing.assert_frame_equal(pdf, mdf2)

        finally:
            shutil.rmtree(tempdir)

        # test multiply files
        tempdir = tempfile.mkdtemp()
        try:
            df = pd.DataFrame(np.random.rand(300, 3), columns=['a', 'b', 'c'])

            file_paths = [
                os.path.join(tempdir, 'test{}.csv'.format(i)) for i in range(3)
            ]
            df[:100].to_csv(file_paths[0])
            df[100:200].to_csv(file_paths[1])
            df[200:].to_csv(file_paths[2])

            mdf = self.executor.execute_dataframe(md.read_csv(file_paths,
                                                              index_col=0),
                                                  concat=True)[0]
            pd.testing.assert_frame_equal(df, mdf)

            mdf2 = self.executor.execute_dataframe(md.read_csv(file_paths,
                                                               index_col=0,
                                                               chunk_bytes=50),
                                                   concat=True)[0]
            pd.testing.assert_frame_equal(df, mdf2)

        finally:
            shutil.rmtree(tempdir)

        # test wildcards in path
        tempdir = tempfile.mkdtemp()
        try:
            df = pd.DataFrame(np.random.rand(300, 3), columns=['a', 'b', 'c'])

            file_paths = [
                os.path.join(tempdir, 'test{}.csv'.format(i)) for i in range(3)
            ]
            df[:100].to_csv(file_paths[0])
            df[100:200].to_csv(file_paths[1])
            df[200:].to_csv(file_paths[2])

            # As we can not guarantee the order in which these files are processed,
            # the result may not keep the original order.
            mdf = self.executor.execute_dataframe(md.read_csv(
                '{}/*.csv'.format(tempdir), index_col=0),
                                                  concat=True)[0]
            pd.testing.assert_frame_equal(df, mdf.sort_index())

            mdf2 = self.executor.execute_dataframe(md.read_csv(
                '{}/*.csv'.format(tempdir), index_col=0, chunk_bytes=50),
                                                   concat=True)[0]
            pd.testing.assert_frame_equal(df, mdf2.sort_index())

        finally:
            shutil.rmtree(tempdir)

    @require_cudf
    def testReadCSVGPUExecution(self):
        tempdir = tempfile.mkdtemp()
        file_path = os.path.join(tempdir, 'test.csv')
        try:
            df = pd.DataFrame({
                'col1':
                np.random.rand(100),
                'col2':
                np.random.choice(['a', 'b', 'c'], (100, )),
                'col3':
                np.arange(100)
            })
            df.to_csv(file_path, index=False)

            pdf = pd.read_csv(file_path)
            mdf = self.executor.execute_dataframe(md.read_csv(file_path,
                                                              gpu=True),
                                                  concat=True)[0]
            pd.testing.assert_frame_equal(
                pdf.reset_index(drop=True),
                mdf.to_pandas().reset_index(drop=True))

            mdf2 = self.executor.execute_dataframe(md.read_csv(
                file_path, gpu=True, chunk_bytes=200),
                                                   concat=True)[0]
            pd.testing.assert_frame_equal(
                pdf.reset_index(drop=True),
                mdf2.to_pandas().reset_index(drop=True))

        finally:
            shutil.rmtree(tempdir)

    def testReadCSVWithoutIndex(self):
        sess = new_session()

        # test csv file without storing index
        tempdir = tempfile.mkdtemp()
        file_path = os.path.join(tempdir, 'test.csv')
        try:
            df = pd.DataFrame(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
                              columns=['a', 'b', 'c'])
            df.to_csv(file_path, index=False)

            pdf = pd.read_csv(file_path)
            mdf = sess.run(md.read_csv(file_path, sort_range_index=True))
            pd.testing.assert_frame_equal(pdf, mdf)

            mdf2 = sess.run(
                md.read_csv(file_path, sort_range_index=True, chunk_bytes=10))
            pd.testing.assert_frame_equal(pdf, mdf2)
        finally:
            shutil.rmtree(tempdir)

    def testReadSQLTableExecution(self):
        import sqlalchemy as sa

        test_df = pd.DataFrame({
            'a': np.arange(10).astype(np.int64, copy=False),
            'b': ['s%d' % i for i in range(10)],
            'c': np.random.rand(10)
        })

        with tempfile.TemporaryDirectory() as d:
            table_name = 'test'
            table_name2 = 'test2'
            uri = 'sqlite:///' + os.path.join(d, 'test.db')

            test_df.to_sql(table_name, uri, index=False)

            r = md.read_sql_table('test', uri, chunk_size=4)
            result = self.executor.execute_dataframe(r, concat=True)[0]
            pd.testing.assert_frame_equal(result, test_df)

            engine = sa.create_engine(uri)
            m = sa.MetaData()

            try:
                # test index_col and columns
                r = md.read_sql_table('test',
                                      engine.connect(),
                                      chunk_size=4,
                                      index_col='a',
                                      columns=['b'])
                result = self.executor.execute_dataframe(r, concat=True)[0]
                expected = test_df.copy(deep=True)
                expected.set_index('a', inplace=True)
                del expected['c']
                pd.testing.assert_frame_equal(result, expected)

                # do not specify chunk_size
                r = md.read_sql_table('test',
                                      engine.connect(),
                                      index_col='a',
                                      columns=['b'])
                result = self.executor.execute_dataframe(r, concat=True)[0]
                pd.testing.assert_frame_equal(result, expected)

                table = sa.Table(table_name,
                                 m,
                                 autoload=True,
                                 autoload_with=engine)
                r = md.read_sql_table(
                    table,
                    engine,
                    chunk_size=4,
                    index_col=[table.columns['a'], table.columns['b']],
                    columns=[table.columns['c']])
                result = self.executor.execute_dataframe(r, concat=True)[0]
                expected = test_df.copy(deep=True)
                expected.set_index(['a', 'b'], inplace=True)
                pd.testing.assert_frame_equal(result, expected)

                # test primary key
                sa.Table(table_name2, m,
                         sa.Column('id', sa.Integer, primary_key=True),
                         sa.Column('a', sa.Integer), sa.Column('b', sa.String),
                         sa.Column('c', sa.Float))
                m.create_all(engine)
                test_df = test_df.copy(deep=True)
                test_df.index.name = 'id'
                test_df.to_sql(table_name2, uri, if_exists='append')

                r = md.read_sql_table(table_name2,
                                      engine,
                                      chunk_size=4,
                                      index_col='id')
                result = self.executor.execute_dataframe(r, concat=True)[0]
                pd.testing.assert_frame_equal(result, test_df)
            finally:
                engine.dispose()
Ejemplo n.º 17
0
class TestUnary(TestBase):
    def setUp(self):
        super().setUp()
        self.executor = ExecutorForTest()

    def testAbs(self):
        data1 = pd.DataFrame(np.random.uniform(low=-1, high=1, size=(10, 10)))
        df1 = from_pandas(data1, chunk_size=5)

        result = self.executor.execute_dataframe(df1.abs(), concat=True)[0]
        expected = data1.abs()
        pd.testing.assert_frame_equal(expected, result)

        result = self.executor.execute_dataframe(abs(df1), concat=True)[0]
        pd.testing.assert_frame_equal(expected, result)

    def testNot(self):
        data1 = pd.DataFrame(
            np.random.uniform(low=-1, high=1, size=(10, 10)) > 0)
        df1 = from_pandas(data1, chunk_size=5)

        result = self.executor.execute_dataframe(~df1, concat=True)[0]
        expected = ~data1
        pd.testing.assert_frame_equal(expected, result)

    def testUfunc(self):
        df_raw = pd.DataFrame(np.random.uniform(size=(10, 10)),
                              index=pd.RangeIndex(9, -1, -1))
        df = from_pandas(df_raw, chunk_size=5)

        series_raw = pd.Series(np.random.uniform(size=10),
                               index=pd.RangeIndex(9, -1, -1))
        series = from_pandas_series(series_raw, chunk_size=5)

        ufuncs = [[np.abs, mt.abs], [np.log, mt.log], [np.log2, mt.log2],
                  [np.log10, mt.log10], [np.sin, mt.sin], [np.cos, mt.cos],
                  [np.tan, mt.tan], [np.sinh, mt.sinh], [np.cosh, mt.cosh],
                  [np.tanh, mt.tanh], [np.arcsin, mt.arcsin],
                  [np.arccos, mt.arccos], [np.arctan, mt.arctan],
                  [np.arcsinh, mt.arcsinh], [np.arccosh, mt.arccosh],
                  [np.arctanh, mt.arctanh], [np.radians, mt.radians],
                  [np.degrees, mt.degrees], [np.ceil, mt.ceil],
                  [np.floor, mt.floor],
                  [
                      partial(np.around, decimals=2),
                      partial(mt.around, decimals=2)
                  ], [np.exp, mt.exp], [np.exp2, mt.exp2],
                  [np.expm1, mt.expm1], [np.sqrt, mt.sqrt]]

        for raw, data in [(df_raw, df), (series_raw, series)]:
            for npf, mtf in ufuncs:
                r = mtf(data)

                result = self.executor.execute_tensor(r, concat=True)[0]
                expected = npf(raw)

                if isinstance(raw, pd.DataFrame):
                    pd.testing.assert_frame_equal(result, expected)
                else:
                    pd.testing.assert_series_equal(result, expected)

                # test numpy ufunc
                r = npf(data)

                result = self.executor.execute_tensor(r, concat=True)[0]

                if isinstance(raw, pd.DataFrame):
                    pd.testing.assert_frame_equal(result, expected)
                else:
                    pd.testing.assert_series_equal(result, expected)
Ejemplo n.º 18
0
class Test(unittest.TestCase):
    def setUp(self):
        self.executor = ExecutorForTest('numpy')

    def testSumProdExecution(self):
        arr = ones((10, 8), chunk_size=3)
        self.assertEqual([80], self.executor.execute_tensor(arr.sum()))
        self.assertEqual((10,) * 8,
                         tuple(np.concatenate(self.executor.execute_tensor(arr.sum(axis=0)))))

        arr = ones((3, 3), chunk_size=2)
        self.assertEqual([512], self.executor.execute_tensor((arr * 2).prod()))
        self.assertEqual((8,) * 3,
                         tuple(np.concatenate(self.executor.execute_tensor((arr * 2).prod(axis=0)))))

        raw = sps.random(10, 20, density=.1)
        arr = tensor(raw, chunk_size=3)
        res = self.executor.execute_tensor(arr.sum())[0]

        self.assertAlmostEqual(res, raw.sum())

        # test order
        raw = np.asfortranarray(np.random.rand(10, 20, 30))
        arr = tensor(raw, chunk_size=13)
        arr2 = arr.sum(axis=-1)

        res = self.executor.execute_tensor(arr2, concat=True)[0]
        expected = raw.sum(axis=-1)
        np.testing.assert_allclose(res, expected)
        self.assertEqual(res.flags['C_CONTIGUOUS'], expected.flags['C_CONTIGUOUS'])
        self.assertEqual(res.flags['F_CONTIGUOUS'], expected.flags['F_CONTIGUOUS'])

        # test string dtype
        a = tensor(list('abcdefghi'), dtype=object)
        self.assertEqual(self.executor.execute_tensor(a.sum(), concat=True)[0], 'abcdefghi')
        a = tensor(list('abcdefghi'), dtype=object, chunk_size=2)
        self.assertEqual(self.executor.execute_tensor(a.sum(), concat=True)[0], 'abcdefghi')

    def testMaxMinExecution(self):
        raw = np.random.randint(10000, size=(10, 10, 10))

        arr = tensor(raw, chunk_size=3)

        self.assertEqual([raw.max()], self.executor.execute_tensor(arr.max()))
        self.assertEqual([raw.min()], self.executor.execute_tensor(arr.min()))

        np.testing.assert_array_equal(
            raw.max(axis=0), self.executor.execute_tensor(arr.max(axis=0), concat=True)[0])
        self.assertFalse(arr.max(axis=0).issparse())
        np.testing.assert_array_equal(
            raw.min(axis=0), self.executor.execute_tensor(arr.min(axis=0), concat=True)[0])
        self.assertFalse(arr.min(axis=0).issparse())

        np.testing.assert_array_equal(
            raw.max(axis=(1, 2)), self.executor.execute_tensor(arr.max(axis=(1, 2)), concat=True)[0])
        np.testing.assert_array_equal(
            raw.min(axis=(1, 2)), self.executor.execute_tensor(arr.min(axis=(1, 2)), concat=True)[0])

        raw = sps.random(10, 10, density=.5)

        arr = tensor(raw, chunk_size=3)

        self.assertEqual([raw.max()], self.executor.execute_tensor(arr.max()))
        self.assertEqual([raw.min()], self.executor.execute_tensor(arr.min()))

        np.testing.assert_almost_equal(
            raw.max(axis=1).A.ravel(),
            self.executor.execute_tensor(arr.max(axis=1), concat=True)[0].toarray())
        self.assertTrue(arr.max(axis=1).issparse())
        np.testing.assert_almost_equal(
            raw.min(axis=1).A.ravel(),
            self.executor.execute_tensor(arr.min(axis=1), concat=True)[0].toarray())
        self.assertTrue(arr.min(axis=1).issparse())

        # test string dtype
        a = tensor(list('abcdefghi'), dtype=object)
        self.assertEqual(self.executor.execute_tensor(a.max(), concat=True)[0], 'i')
        a = tensor(list('abcdefghi'), dtype=object, chunk_size=2)
        self.assertEqual(self.executor.execute_tensor(a.max(), concat=True)[0], 'i')

    def testAllAnyExecution(self):
        raw1 = np.zeros((10, 15))
        raw2 = np.ones((10, 15))
        raw3 = np.array([[True, False, True, False], [True, True, True, True],
                         [False, False, False, False], [False, True, False, True]])

        arr1 = tensor(raw1, chunk_size=3)
        arr2 = tensor(raw2, chunk_size=3)
        arr3 = tensor(raw3, chunk_size=4)

        self.assertFalse(self.executor.execute_tensor(arr1.all())[0])
        self.assertTrue(self.executor.execute_tensor(arr2.all())[0])
        self.assertFalse(self.executor.execute_tensor(arr1.any())[0])
        self.assertTrue(self.executor.execute_tensor(arr1.any()))
        np.testing.assert_array_equal(raw3.all(axis=1),
                                      self.executor.execute_tensor(arr3.all(axis=1))[0])
        np.testing.assert_array_equal(raw3.any(axis=0),
                                      self.executor.execute_tensor(arr3.any(axis=0))[0])

        raw = sps.random(10, 10, density=.5) > .5

        arr = tensor(raw, chunk_size=3)

        self.assertEqual(raw.A.all(), self.executor.execute_tensor(arr.all())[0])
        self.assertEqual(raw.A.any(), self.executor.execute_tensor(arr.any())[0])

        # test string dtype
        a = tensor(list('abcdefghi'), dtype=object)
        self.assertEqual(self.executor.execute_tensor(a.all(), concat=True)[0], 'i')
        a = tensor(list('abcdefghi'), dtype=object, chunk_size=2)
        self.assertEqual(self.executor.execute_tensor(a.any(), concat=True)[0], 'a')

    def testMeanExecution(self):
        raw1 = np.random.random((20, 25))
        raw2 = np.random.randint(10, size=(20, 25))

        arr1 = tensor(raw1, chunk_size=3)

        res1 = self.executor.execute_tensor(arr1.mean())
        expected1 = raw1.mean()
        self.assertTrue(np.allclose(res1[0], expected1))

        res2 = self.executor.execute_tensor(arr1.mean(axis=0))
        expected2 = raw1.mean(axis=0)
        self.assertTrue(np.allclose(np.concatenate(res2), expected2))

        res3 = self.executor.execute_tensor(arr1.mean(axis=1, keepdims=True))
        expected3 = raw1.mean(axis=1, keepdims=True)
        self.assertTrue(np.allclose(np.concatenate(res3), expected3))

        arr2 = tensor(raw2, chunk_size=3)

        res1 = self.executor.execute_tensor(arr2.mean())
        expected1 = raw2.mean()
        self.assertEqual(res1[0], expected1)

        res2 = self.executor.execute_tensor(arr2.mean(axis=0))
        expected2 = raw2.mean(axis=0)
        self.assertTrue(np.allclose(np.concatenate(res2), expected2))

        res3 = self.executor.execute_tensor(arr2.mean(axis=1, keepdims=True))
        expected3 = raw2.mean(axis=1, keepdims=True)
        self.assertTrue(np.allclose(np.concatenate(res3), expected3))

        raw1 = sps.random(20, 25, density=.1)

        arr1 = tensor(raw1, chunk_size=3)

        res1 = self.executor.execute_tensor(arr1.mean())
        expected1 = raw1.mean()
        self.assertTrue(np.allclose(res1[0], expected1))

        arr2 = tensor(raw1, chunk_size=30)

        res1 = self.executor.execute_tensor(arr2.mean())
        expected1 = raw1.mean()
        self.assertTrue(np.allclose(res1[0], expected1))

        arr = mean(1)
        self.assertEqual(self.executor.execute_tensor(arr)[0], 1)

        with self.assertRaises(TypeError):
            self.executor.execute_tensor(tensor(list('abcdefghi'), dtype=object).mean())

    def testVarExecution(self):
        raw1 = np.random.random((20, 25))
        raw2 = np.random.randint(10, size=(20, 25))

        arr0 = tensor(raw1, chunk_size=25)

        res1 = self.executor.execute_tensor(arr0.var())
        expected1 = raw1.var()
        self.assertTrue(np.allclose(res1[0], expected1))

        arr1 = tensor(raw1, chunk_size=3)

        res1 = self.executor.execute_tensor(arr1.var())
        expected1 = raw1.var()
        self.assertTrue(np.allclose(res1[0], expected1))

        res2 = self.executor.execute_tensor(arr1.var(axis=0))
        expected2 = raw1.var(axis=0)
        self.assertTrue(np.allclose(np.concatenate(res2), expected2))

        res3 = self.executor.execute_tensor(arr1.var(axis=1, keepdims=True))
        expected3 = raw1.var(axis=1, keepdims=True)
        self.assertTrue(np.allclose(np.concatenate(res3), expected3))

        arr2 = tensor(raw2, chunk_size=3)

        res1 = self.executor.execute_tensor(arr2.var())
        expected1 = raw2.var()
        self.assertAlmostEqual(res1[0], expected1)

        res2 = self.executor.execute_tensor(arr2.var(axis=0))
        expected2 = raw2.var(axis=0)
        self.assertTrue(np.allclose(np.concatenate(res2), expected2))

        res3 = self.executor.execute_tensor(arr2.var(axis=1, keepdims=True))
        expected3 = raw2.var(axis=1, keepdims=True)
        self.assertTrue(np.allclose(np.concatenate(res3), expected3))

        res4 = self.executor.execute_tensor(arr2.var(ddof=1))
        expected4 = raw2.var(ddof=1)
        self.assertAlmostEqual(res4[0], expected4)

        raw1 = sps.random(20, 25, density=.1)

        arr1 = tensor(raw1, chunk_size=3)

        res1 = self.executor.execute_tensor(arr1.var())
        expected1 = raw1.toarray().var()
        self.assertTrue(np.allclose(res1[0], expected1))

        arr2 = tensor(raw1, chunk_size=30)

        res1 = self.executor.execute_tensor(arr2.var())
        expected1 = raw1.toarray().var()
        self.assertTrue(np.allclose(res1[0], expected1))

        arr = var(1)
        self.assertEqual(self.executor.execute_tensor(arr)[0], 0)

    def testStdExecution(self):
        raw1 = np.random.random((20, 25))
        raw2 = np.random.randint(10, size=(20, 25))

        arr1 = tensor(raw1, chunk_size=3)

        res1 = self.executor.execute_tensor(arr1.std())
        expected1 = raw1.std()
        self.assertTrue(np.allclose(res1[0], expected1))

        res2 = self.executor.execute_tensor(arr1.std(axis=0))
        expected2 = raw1.std(axis=0)
        self.assertTrue(np.allclose(np.concatenate(res2), expected2))

        res3 = self.executor.execute_tensor(arr1.std(axis=1, keepdims=True))
        expected3 = raw1.std(axis=1, keepdims=True)
        self.assertTrue(np.allclose(np.concatenate(res3), expected3))

        arr2 = tensor(raw2, chunk_size=3)

        res1 = self.executor.execute_tensor(arr2.std())
        expected1 = raw2.std()
        self.assertAlmostEqual(res1[0], expected1)

        res2 = self.executor.execute_tensor(arr2.std(axis=0))
        expected2 = raw2.std(axis=0)
        self.assertTrue(np.allclose(np.concatenate(res2), expected2))

        res3 = self.executor.execute_tensor(arr2.std(axis=1, keepdims=True))
        expected3 = raw2.std(axis=1, keepdims=True)
        self.assertTrue(np.allclose(np.concatenate(res3), expected3))

        res4 = self.executor.execute_tensor(arr2.std(ddof=1))
        expected4 = raw2.std(ddof=1)
        self.assertAlmostEqual(res4[0], expected4)

        raw1 = sps.random(20, 25, density=.1)

        arr1 = tensor(raw1, chunk_size=3)

        res1 = self.executor.execute_tensor(arr1.std())
        expected1 = raw1.toarray().std()
        self.assertTrue(np.allclose(res1[0], expected1))

        arr2 = tensor(raw1, chunk_size=30)

        res1 = self.executor.execute_tensor(arr2.std())
        expected1 = raw1.toarray().std()
        self.assertTrue(np.allclose(res1[0], expected1))

        arr = std(1)
        self.assertEqual(self.executor.execute_tensor(arr)[0], 0)

    def testArgReduction(self):
        raw = np.random.random((20, 20, 20))

        arr = tensor(raw, chunk_size=3)

        self.assertEqual(raw.argmax(),
                         self.executor.execute_tensor(arr.argmax())[0])
        self.assertEqual(raw.argmin(),
                         self.executor.execute_tensor(arr.argmin())[0])

        np.testing.assert_array_equal(
            raw.argmax(axis=0), self.executor.execute_tensor(arr.argmax(axis=0), concat=True)[0])
        np.testing.assert_array_equal(
            raw.argmin(axis=0), self.executor.execute_tensor(arr.argmin(axis=0), concat=True)[0])

        raw_format = sps.random(20, 20, density=.1, format='lil')

        random_min = np.random.randint(0, 200)
        random_max = np.random.randint(200, 400)
        raw_format[np.unravel_index(random_min, raw_format.shape)] = -1
        raw_format[np.unravel_index(random_max, raw_format.shape)] = 2

        raw = raw_format.tocoo()
        arr = tensor(raw, chunk_size=3)

        self.assertEqual(raw.argmax(),
                         self.executor.execute_tensor(arr.argmax())[0])
        self.assertEqual(raw.argmin(),
                         self.executor.execute_tensor(arr.argmin())[0])

        # test order
        raw = np.asfortranarray(np.random.rand(10, 20, 30))
        arr = tensor(raw, chunk_size=13)
        arr2 = arr.argmax(axis=-1)

        res = self.executor.execute_tensor(arr2, concat=True)[0]
        expected = raw.argmax(axis=-1)
        np.testing.assert_allclose(res, expected)
        self.assertEqual(res.flags['C_CONTIGUOUS'], expected.flags['C_CONTIGUOUS'])
        self.assertEqual(res.flags['F_CONTIGUOUS'], expected.flags['F_CONTIGUOUS'])

        with self.assertRaises(TypeError):
            self.executor.execute_tensor(tensor(list('abcdefghi'), dtype=object).argmax())

    @ignore_warning
    def testNanReduction(self):
        raw = np.random.choice(a=[0, 1, np.nan], size=(10, 10), p=[0.3, 0.4, 0.3])

        arr = tensor(raw, chunk_size=3)

        self.assertEqual(np.nansum(raw), self.executor.execute_tensor(nansum(arr))[0])
        self.assertEqual(np.nanprod(raw), self.executor.execute_tensor(nanprod(arr))[0])
        self.assertEqual(np.nanmax(raw), self.executor.execute_tensor(nanmax(arr))[0])
        self.assertEqual(np.nanmin(raw), self.executor.execute_tensor(nanmin(arr))[0])
        self.assertEqual(np.nanmean(raw), self.executor.execute_tensor(nanmean(arr))[0])
        self.assertAlmostEqual(np.nanvar(raw), self.executor.execute_tensor(nanvar(arr))[0])
        self.assertAlmostEqual(np.nanvar(raw, ddof=1), self.executor.execute_tensor(nanvar(arr, ddof=1))[0])
        self.assertAlmostEqual(np.nanstd(raw), self.executor.execute_tensor(nanstd(arr))[0])
        self.assertAlmostEqual(np.nanstd(raw, ddof=1), self.executor.execute_tensor(nanstd(arr, ddof=1))[0])

        arr = tensor(raw, chunk_size=10)

        self.assertEqual(np.nansum(raw), self.executor.execute_tensor(nansum(arr))[0])
        self.assertEqual(np.nanprod(raw), self.executor.execute_tensor(nanprod(arr))[0])
        self.assertEqual(np.nanmax(raw), self.executor.execute_tensor(nanmax(arr))[0])
        self.assertEqual(np.nanmin(raw), self.executor.execute_tensor(nanmin(arr))[0])
        self.assertEqual(np.nanmean(raw), self.executor.execute_tensor(nanmean(arr))[0])
        self.assertAlmostEqual(np.nanvar(raw), self.executor.execute_tensor(nanvar(arr))[0])
        self.assertAlmostEqual(np.nanvar(raw, ddof=1), self.executor.execute_tensor(nanvar(arr, ddof=1))[0])
        self.assertAlmostEqual(np.nanstd(raw), self.executor.execute_tensor(nanstd(arr))[0])
        self.assertAlmostEqual(np.nanstd(raw, ddof=1), self.executor.execute_tensor(nanstd(arr, ddof=1))[0])

        raw = np.random.random((10, 10))
        raw[:3, :3] = np.nan
        arr = tensor(raw, chunk_size=3)
        self.assertEqual(np.nanargmin(raw), self.executor.execute_tensor(nanargmin(arr))[0])
        self.assertEqual(np.nanargmax(raw), self.executor.execute_tensor(nanargmax(arr))[0])

        raw = np.full((10, 10), np.nan)
        arr = tensor(raw, chunk_size=3)

        self.assertEqual(0, self.executor.execute_tensor(nansum(arr))[0])
        self.assertEqual(1, self.executor.execute_tensor(nanprod(arr))[0])
        self.assertTrue(np.isnan(self.executor.execute_tensor(nanmax(arr))[0]))
        self.assertTrue(np.isnan(self.executor.execute_tensor(nanmin(arr))[0]))
        self.assertTrue(np.isnan(self.executor.execute_tensor(nanmean(arr))[0]))
        with self.assertRaises(ValueError):
            _ = self.executor.execute_tensor(nanargmin(arr))[0]  # noqa: F841
        with self.assertRaises(ValueError):
            _ = self.executor.execute_tensor(nanargmax(arr))[0]  # noqa: F841

        raw = sps.random(10, 10, density=.1, format='csr')
        raw[:3, :3] = np.nan
        arr = tensor(raw, chunk_size=3)

        self.assertAlmostEqual(np.nansum(raw.A), self.executor.execute_tensor(nansum(arr))[0])
        self.assertAlmostEqual(np.nanprod(raw.A), self.executor.execute_tensor(nanprod(arr))[0])
        self.assertAlmostEqual(np.nanmax(raw.A), self.executor.execute_tensor(nanmax(arr))[0])
        self.assertAlmostEqual(np.nanmin(raw.A), self.executor.execute_tensor(nanmin(arr))[0])
        self.assertAlmostEqual(np.nanmean(raw.A), self.executor.execute_tensor(nanmean(arr))[0])
        self.assertAlmostEqual(np.nanvar(raw.A), self.executor.execute_tensor(nanvar(arr))[0])
        self.assertAlmostEqual(np.nanvar(raw.A, ddof=1), self.executor.execute_tensor(nanvar(arr, ddof=1))[0])
        self.assertAlmostEqual(np.nanstd(raw.A), self.executor.execute_tensor(nanstd(arr))[0])
        self.assertAlmostEqual(np.nanstd(raw.A, ddof=1), self.executor.execute_tensor(nanstd(arr, ddof=1))[0])

        arr = nansum(1)
        self.assertEqual(self.executor.execute_tensor(arr)[0], 1)

    def testCumReduction(self):
        raw = np.random.randint(5, size=(8, 8, 8))

        arr = tensor(raw, chunk_size=3)

        res1 = self.executor.execute_tensor(arr.cumsum(axis=1), concat=True)
        res2 = self.executor.execute_tensor(arr.cumprod(axis=1), concat=True)
        expected1 = raw.cumsum(axis=1)
        expected2 = raw.cumprod(axis=1)
        np.testing.assert_array_equal(res1[0], expected1)
        np.testing.assert_array_equal(res2[0], expected2)

        raw = sps.random(8, 8, density=.1)

        arr = tensor(raw, chunk_size=3)

        res1 = self.executor.execute_tensor(arr.cumsum(axis=1), concat=True)
        res2 = self.executor.execute_tensor(arr.cumprod(axis=1), concat=True)
        expected1 = raw.A.cumsum(axis=1)
        expected2 = raw.A.cumprod(axis=1)
        self.assertTrue(np.allclose(res1[0], expected1))
        self.assertTrue(np.allclose(res2[0], expected2))

        # test order
        raw = np.asfortranarray(np.random.rand(10, 20, 30))
        arr = tensor(raw, chunk_size=13)
        arr2 = arr.cumsum(axis=-1)

        res = self.executor.execute_tensor(arr2, concat=True)[0]
        expected = raw.cumsum(axis=-1)
        np.testing.assert_allclose(res, expected)
        self.assertEqual(res.flags['C_CONTIGUOUS'], expected.flags['C_CONTIGUOUS'])
        self.assertEqual(res.flags['F_CONTIGUOUS'], expected.flags['F_CONTIGUOUS'])

        # test string dtype
        a = tensor(list('abcdefghi'), dtype=object)
        np.testing.assert_array_equal(self.executor.execute_tensor(a.cumsum(), concat=True)[0],
                                      np.cumsum(np.array(list('abcdefghi'), dtype=object)))
        a = tensor(list('abcdefghi'), dtype=object, chunk_size=2)
        np.testing.assert_array_equal(self.executor.execute_tensor(a.cumsum(), concat=True)[0],
                                      np.cumsum(np.array(list('abcdefghi'), dtype=object)))

    def testNanCumReduction(self):
        raw = np.random.randint(5, size=(8, 8, 8))
        raw[:2, 2:4, 4:6] = np.nan

        arr = tensor(raw, chunk_size=3)

        res1 = self.executor.execute_tensor(nancumsum(arr, axis=1), concat=True)
        res2 = self.executor.execute_tensor(nancumprod(arr, axis=1), concat=True)
        expected1 = np.nancumsum(raw, axis=1)
        expected2 = np.nancumprod(raw, axis=1)
        np.testing.assert_array_equal(res1[0], expected1)
        np.testing.assert_array_equal(res2[0], expected2)

        raw = sps.random(8, 8, density=.1, format='lil')
        raw[:2, 2:4] = np.nan

        arr = tensor(raw, chunk_size=3)

        res1 = self.executor.execute_tensor(nancumsum(arr, axis=1), concat=True)[0]
        res2 = self.executor.execute_tensor(nancumprod(arr, axis=1), concat=True)[0]
        expected1 = np.nancumsum(raw.A, axis=1)
        expected2 = np.nancumprod(raw.A, axis=1)
        self.assertTrue(np.allclose(res1, expected1))
        self.assertTrue(np.allclose(res2, expected2))

    def testOutReductionExecution(self):
        raw = np.random.randint(5, size=(8, 8, 8))

        arr = tensor(raw, chunk_size=3)
        arr2 = ones((8, 8), dtype='i8', chunk_size=3)
        arr.sum(axis=1, out=arr2)

        res = self.executor.execute_tensor(arr2, concat=True)[0]
        expected = raw.sum(axis=1)

        np.testing.assert_array_equal(res, expected)

    def testOutCumReductionExecution(self):
        raw = np.random.randint(5, size=(8, 8, 8))

        arr = tensor(raw, chunk_size=3)
        arr.cumsum(axis=0, out=arr)

        res = self.executor.execute_tensor(arr, concat=True)[0]
        expected = raw.cumsum(axis=0)

        np.testing.assert_array_equal(res, expected)

    def testCountNonzeroExecution(self):
        raw = [[0, 1, 7, 0, 0], [3, 0, 0, 2, 19]]

        arr = tensor(raw, chunk_size=5)
        t = count_nonzero(arr)

        res = self.executor.execute_tensor(t)[0]
        expected = np.count_nonzero(raw)
        np.testing.assert_equal(res, expected)

        arr = tensor(raw, chunk_size=2)
        t = count_nonzero(arr)

        res = self.executor.execute_tensor(t)[0]
        expected = np.count_nonzero(raw)
        np.testing.assert_equal(res, expected)

        t = count_nonzero(arr, axis=0)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.count_nonzero(raw, axis=0)
        np.testing.assert_equal(res, expected)

        t = count_nonzero(arr, axis=1)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.count_nonzero(raw, axis=1)
        np.testing.assert_equal(res, expected)

        raw = sps.csr_matrix(raw)

        arr = tensor(raw, chunk_size=2)
        t = count_nonzero(arr)

        res = self.executor.execute_tensor(t)[0]
        expected = np.count_nonzero(raw.A)
        np.testing.assert_equal(res, expected)

        t = count_nonzero(arr, axis=0)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.count_nonzero(raw.A, axis=0)
        np.testing.assert_equal(res, expected)

        t = count_nonzero(arr, axis=1)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.count_nonzero(raw.A, axis=1)
        np.testing.assert_equal(res, expected)

        # test string dtype
        a = tensor(list('abcdefghi'), dtype=object)
        self.assertEqual(self.executor.execute_tensor(count_nonzero(a), concat=True)[0], 9)
        a = tensor(list('abcdefghi'), dtype=object, chunk_size=2)
        self.assertEqual(self.executor.execute_tensor(count_nonzero(a), concat=True)[0], 9)

    def testAllcloseExecution(self):
        a = tensor([1e10, 1e-7], chunk_size=1)
        b = tensor([1.00001e10, 1e-8], chunk_size=1)

        t = allclose(a, b)

        res = self.executor.execute_tensor(t)[0]
        self.assertFalse(res)

        a = tensor([1e10, 1e-8], chunk_size=1)
        b = tensor([1.00001e10, 1e-9], chunk_size=1)

        t = allclose(a, b)

        res = self.executor.execute_tensor(t)[0]
        self.assertTrue(res)

        a = tensor([1.0, np.nan], chunk_size=1)
        b = tensor([1.0, np.nan], chunk_size=1)

        t = allclose(a, b, equal_nan=True)

        res = self.executor.execute_tensor(t)[0]
        self.assertTrue(res)

        a = tensor(sps.csr_matrix([[1e10, 1e-7], [0, 0]]), chunk_size=1)
        b = tensor(sps.csr_matrix([[1.00001e10, 1e-8], [0, 0]]), chunk_size=1)

        t = allclose(a, b)

        res = self.executor.execute_tensor(t)[0]
        self.assertFalse(res)

        # test string dtype
        with self.assertRaises(TypeError):
            a = tensor(list('abcdefghi'), dtype=object)
            self.executor.execute_tensor(allclose(a, a))

    def testArrayEqual(self):
        a = ones((10, 5), chunk_size=1)
        b = ones((10, 5), chunk_size=2)

        c = array_equal(a, b)

        res = bool(self.executor.execute_tensor(c)[0])
        self.assertTrue(res)
Ejemplo n.º 19
0
class Test(unittest.TestCase):
    def setUp(self) -> None:
        self.executor = ExecutorForTest('numpy')

    def testCheckNonNegativeThenReturnValueExecution(self):
        raw = np.random.randint(10, size=(10, 5))
        c = mt.tensor(raw, chunk_size=(3, 2))

        r = check_non_negative_then_return_value(c, c, 'sth')
        result = self.executor.execute_tileable(r, concat=True)[0]
        np.testing.assert_array_equal(result, raw)

        raw = raw.copy()
        raw[1, 3] = -1
        c = mt.tensor(raw, chunk_size=(3, 2))

        r = check_non_negative_then_return_value(c, c, 'sth')
        with self.assertRaises(ValueError):
            _ = self.executor.execute_tileable(r, concat=True)[0]

        raw = sps.random(10, 5, density=.3, format='csr')
        c = mt.tensor(raw, chunk_size=(3, 2))

        r = check_non_negative_then_return_value(c, c, 'sth')
        result = self.executor.execute_tileable(r, concat=True)[0]
        np.testing.assert_array_equal(result.toarray(), raw.A)

        raw = raw.copy()
        raw[1, 3] = -1
        c = mt.tensor(raw, chunk_size=(3, 2))

        r = check_non_negative_then_return_value(c, c, 'sth')
        with self.assertRaises(ValueError):
            _ = self.executor.execute_tileable(r, concat=True)[0]

        raw = pd.DataFrame(np.random.rand(10, 4))
        c = md.DataFrame(raw, chunk_size=(3, 2))

        r = check_non_negative_then_return_value(c, c, 'sth')
        result = self.executor.execute_tileable(r, concat=True)[0]

        pd.testing.assert_frame_equal(result, raw)

        raw = raw.copy()
        raw.iloc[1, 3] = -1
        c = md.DataFrame(raw, chunk_size=(3, 2))

        r = check_non_negative_then_return_value(c, c, 'sth')
        with self.assertRaises(ValueError):
            _ = self.executor.execute_tileable(r, concat=True)[0]

    def testAssertAllFinite(self):
        raw = np.array([2.3, np.inf], dtype=np.float64)
        x = mt.tensor(raw)

        with self.assertRaises(ValueError):
            r = assert_all_finite(x)
            _ = self.executor.execute_tensor(r)

        raw = np.array([2.3, np.nan], dtype=np.float64)
        x = mt.tensor(raw)

        with self.assertRaises(ValueError):
            r = assert_all_finite(x, allow_nan=False)
            _ = self.executor.execute_tensor(r)

        max_float32 = np.finfo(np.float32).max
        raw = [max_float32] * 2
        self.assertFalse(np.isfinite(np.sum(raw)))
        x = mt.tensor(raw)

        r = assert_all_finite(x)
        result = self.executor.execute_tensor(r, concat=True)[0]
        self.assertTrue(result.item())

        raw = np.array([np.nan, 'a'], dtype=object)
        x = mt.tensor(raw)

        with self.assertRaises(ValueError):
            r = assert_all_finite(x)
            _ = self.executor.execute_tensor(r)

        raw = np.random.rand(10)
        x = mt.tensor(raw, chunk_size=2)

        r = assert_all_finite(x, check_only=False)
        result = self.executor.execute_tensor(r, concat=True)[0]
        np.testing.assert_array_equal(result, raw)

        r = assert_all_finite(x)
        result = self.executor.execute_tensor(r, concat=True)[0]
        self.assertTrue(result.item())

        with option_context() as options:
            options.learn.assume_finite = True

            self.assertIsNone(assert_all_finite(x))
            self.assertIs(assert_all_finite(x, check_only=False), x)

        # test sparse
        s = sps.random(10,
                       3,
                       density=0.1,
                       format='csr',
                       random_state=np.random.RandomState(0))
        s[0, 2] = np.nan

        with self.assertRaises(ValueError):
            r = assert_all_finite(s)
            _ = self.executor.execute_tensor(r)
Ejemplo n.º 20
0
class Test(unittest.TestCase):
    def setUp(self):
        self.executor = ExecutorForTest('numpy')

    def testRandExecution(self):
        arr = tensor.random.rand(10, 20, chunk_size=3, dtype='f4')
        res = self.executor.execute_tensor(arr, concat=True)[0]
        self.assertEqual(res.shape, (10, 20))
        self.assertTrue(np.all(res < 1))
        self.assertTrue(np.all(res > 0))
        self.assertEqual(res.dtype, np.float32)

    def testRandnExecution(self):
        arr = tensor.random.randn(10, 20, chunk_size=3)
        self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (10, 20))

        arr = tensor.random.randn(10, 20, chunk_size=5).tiles()
        for chunk in arr.chunks:
            chunk.op._seed = 0

        for res in self.executor.execute_tensor(arr):
            self.assertTrue(np.array_equal(res, np.random.RandomState(0).randn(5, 5)))

    def testRandintExecution(self):
        size_executor = ExecutorForTest(sync_provider_type=ExecutorForTest.SyncProviderType.MOCK)

        arr = tensor.random.randint(0, 2, size=(10, 30), chunk_size=3)
        size_res = size_executor.execute_tensor(arr, mock=True)
        self.assertEqual(arr.nbytes, sum(tp[0] for tp in size_res))

        res = self.executor.execute_tensor(arr, concat=True)[0]
        self.assertEqual(res.shape, (10, 30))
        self.assertTrue(np.all(res >= 0))
        self.assertTrue(np.all(res < 2))

    @ignore_warning
    def testRandomIntegersExecution(self):
        arr = tensor.random.random_integers(0, 10, size=(10, 20), chunk_size=3)
        self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (10, 20))

        arr = tensor.random.random_integers(0, 10, size=(10, 20), chunk_size=5).tiles()
        for chunk in arr.chunks:
            chunk.op._seed = 0

        for res in self.executor.execute_tensor(arr):
            np.testing.assert_equal(res, np.random.RandomState(0).random_integers(0, 10, size=(5, 5)))

    def testRandomSampleExecution(self):
        arr = tensor.random.random_sample(size=(10, 20), chunk_size=3)
        self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (10, 20))

        arr = tensor.random.random_sample(size=(10, 20), chunk_size=5).tiles()
        for chunk in arr.chunks:
            chunk.op._seed = 0

        for res in self.executor.execute_tensor(arr):
            self.assertTrue(np.array_equal(res, np.random.RandomState(0).random_sample(size=(5, 5))))

    def testRandomExecution(self):
        arr = tensor.random.random(size=(10, 20), chunk_size=3)
        self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (10, 20))

        arr = tensor.random.random(size=(10, 20), chunk_size=5).tiles()
        for chunk in arr.chunks:
            chunk.op._seed = 0

        for res in self.executor.execute_tensor(arr):
            self.assertTrue(np.array_equal(res, np.random.RandomState(0).random_sample(size=(5, 5))))

    def testRandfExecution(self):
        arr = tensor.random.ranf(size=(10, 20), chunk_size=3)
        self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (10, 20))

        arr = tensor.random.ranf(size=(10, 20), chunk_size=5).tiles()
        for chunk in arr.chunks:
            chunk.op._seed = 0

        for res in self.executor.execute_tensor(arr):
            self.assertTrue(np.array_equal(res, np.random.RandomState(0).random_sample(size=(5, 5))))

    def testSampleExecution(self):
        arr = tensor.random.sample(size=(10, 20), chunk_size=3)
        self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (10, 20))

        arr = tensor.random.sample(size=(10, 20), chunk_size=5).tiles()
        for chunk in arr.chunks:
            chunk.op._seed = 0

        for res in self.executor.execute_tensor(arr):
            self.assertTrue(np.array_equal(res, np.random.RandomState(0).random_sample(size=(5, 5))))

    def testChoiceExecution(self):
        arr = tensor.random.choice(5, size=3, chunk_size=1)
        self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (3,))

        arr = tensor.random.choice(5, size=(15,), chunk_size=5).tiles()
        for chunk in arr.chunks:
            chunk.op._seed = 0

        for res in self.executor.execute_tensor(arr):
            self.assertTrue(np.array_equal(res, np.random.RandomState(0).choice(5, size=(5,))))

        arr = tensor.random.choice([1, 4, 9], size=3, chunk_size=1)
        self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (3,))

        arr = tensor.random.choice([1, 4, 9], size=(15,), chunk_size=5).tiles()
        for chunk in arr.chunks:
            chunk.op._seed = 0

        for res in self.executor.execute_tensor(arr):
            self.assertTrue(np.array_equal(res, np.random.RandomState(0).choice([1, 4, 9], size=(5,))))

        with self.assertRaises(ValueError):
            tensor.random.choice([1, 3, 4], size=5, replace=False, chunk_size=2)

        arr = tensor.random.choice([1, 4, 9], size=3, replace=False, chunk_size=1)
        self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (3,))

        arr = tensor.random.choice([1, 4, 9], size=(3,), replace=False, chunk_size=1).tiles()
        for chunk in arr.chunks:
            chunk.op._seed = 0

        for res in self.executor.execute_tensor(arr):
            self.assertTrue(
                np.array_equal(res, np.random.RandomState(0).choice([1, 4, 9], size=(1,), replace=False)))

        arr = tensor.random.choice([1, 4, 9], size=3, p=[.2, .5, .3], chunk_size=1)
        self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (3,))

        arr = tensor.random.choice([1, 4, 9], size=(15,), chunk_size=5, p=[.2, .5, .3]).tiles()
        for chunk in arr.chunks:
            chunk.op._seed = 0

        for res in self.executor.execute_tensor(arr):
            self.assertTrue(
                np.array_equal(res, np.random.RandomState(0).choice([1, 4, 9], size=(5,),
                                                                    p=[.2, .5, .3])))

    def testSparseRandintExecution(self):
        size_executor = ExecutorForTest(sync_provider_type=ExecutorForTest.SyncProviderType.MOCK)

        arr = tensor.random.randint(1, 2, size=(30, 50), density=.1, chunk_size=10, dtype='f4')
        size_res = size_executor.execute_tensor(arr, mock=True)
        self.assertAlmostEqual(arr.nbytes * 0.1, sum(tp[0] for tp in size_res))

        res = self.executor.execute_tensor(arr, concat=True)[0]
        self.assertTrue(issparse(res))
        self.assertEqual(res.shape, (30, 50))
        self.assertTrue(np.all(res.data >= 1))
        self.assertTrue(np.all(res.data < 2))
        self.assertAlmostEqual((res >= 1).toarray().sum(), 30 * 50 * .1, delta=20)

    def testBetaExecute(self):
        arr = tensor.random.beta(1, 2, chunk_size=2).tiles()
        arr.chunks[0].op._seed = 0

        self.assertEqual(self.executor.execute_tensor(arr)[0], np.random.RandomState(0).beta(1, 2))

        arr = tensor.random.beta([1, 2], [3, 4], chunk_size=2).tiles()
        arr.chunks[0].op._seed = 0

        self.assertTrue(np.array_equal(self.executor.execute_tensor(arr)[0],
                                       np.random.RandomState(0).beta([1, 2], [3, 4])))

        arr = tensor.random.beta([[2, 3]], from_ndarray([[4, 6], [5, 2]], chunk_size=2),
                                 chunk_size=1, size=(3, 2, 2)).tiles()
        for c in arr.chunks:
            c.op._seed = 0

        res = self.executor.execute_tensor(arr, concat=True)[0]

        self.assertEqual(res[0, 0, 0], np.random.RandomState(0).beta(2, 4))
        self.assertEqual(res[0, 0, 1], np.random.RandomState(0).beta(3, 6))
        self.assertEqual(res[0, 1, 0], np.random.RandomState(0).beta(2, 5))
        self.assertEqual(res[0, 1, 1], np.random.RandomState(0).beta(3, 2))

        arr = tensor.random.RandomState(0).beta([[3, 4]], [[1], [2]], chunk_size=1)
        tensor.random.seed(0)
        arr2 = tensor.random.beta([[3, 4]], [[1], [2]], chunk_size=1)

        self.assertTrue(np.array_equal(self.executor.execute_tensor(arr, concat=True)[0],
                                       self.executor.execute_tensor(arr2, concat=True)[0]))

    def testBinomialExecute(self):
        arr = tensor.random.binomial(10, .5, 100, chunk_size=10)
        self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (100,))

        arr = tensor.random.binomial(10, .5, 100, chunk_size=10).tiles()
        for chunk in arr.chunks:
            chunk.op._seed = 0

        for res in self.executor.execute_tensor(arr):
            self.assertTrue(np.array_equal(res, np.random.RandomState(0).binomial(10, .5, 10)))

    def testChisquareExecute(self):
        arr = tensor.random.chisquare(2, 100, chunk_size=10)
        self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (100,))

        arr = tensor.random.chisquare(2, 100, chunk_size=10).tiles()
        for chunk in arr.chunks:
            chunk.op._seed = 0

        for res in self.executor.execute_tensor(arr):
            self.assertTrue(np.array_equal(res, np.random.RandomState(0).chisquare(2, 10)))

    def testDirichletExecute(self):
        arr = tensor.random.dirichlet((10, 5, 3), 100, chunk_size=10)
        self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (100, 3))

        arr = tensor.random.dirichlet((10, 5, 3), 100, chunk_size=10).tiles()
        for chunk in arr.chunks:
            chunk.op._seed = 0

        for res in self.executor.execute_tensor(arr):
            self.assertTrue(np.array_equal(res, np.random.RandomState(0).dirichlet((10, 5, 3), 10)))

    def testExponentialExecute(self):
        arr = tensor.random.exponential(1.0, 100, chunk_size=10)
        self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (100,))

        arr = tensor.random.exponential(1.0, 100, chunk_size=10).tiles()
        for chunk in arr.chunks:
            chunk.op._seed = 0

        for res in self.executor.execute_tensor(arr):
            self.assertTrue(np.array_equal(res, np.random.RandomState(0).exponential(1.0, 10)))

    def testFExecute(self):
        arr = tensor.random.f(1.0, 2.0, 100, chunk_size=10)
        self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (100,))

        arr = tensor.random.f(1.0, 2.0, 100, chunk_size=10).tiles()
        for chunk in arr.chunks:
            chunk.op._seed = 0

        for res in self.executor.execute_tensor(arr):
            self.assertTrue(np.array_equal(res, np.random.RandomState(0).f(1.0, 2.0, 10)))

    def testGammaExecute(self):
        arr = tensor.random.gamma(1.0, 2.0, 100, chunk_size=10)
        self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (100,))

        arr = tensor.random.gamma(1.0, 2.0, 100, chunk_size=10).tiles()
        for chunk in arr.chunks:
            chunk.op._seed = 0

        for res in self.executor.execute_tensor(arr):
            self.assertTrue(np.array_equal(res, np.random.RandomState(0).gamma(1.0, 2.0, 10)))

    def testGeometricExecution(self):
        arr = tensor.random.geometric(1.0, 100, chunk_size=10)
        self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (100,))

        arr = tensor.random.geometric(1.0, 100, chunk_size=10).tiles()
        for chunk in arr.chunks:
            chunk.op._seed = 0

        for res in self.executor.execute_tensor(arr):
            self.assertTrue(np.array_equal(res, np.random.RandomState(0).geometric(1.0, 10)))

    def testGumbelExecution(self):
        arr = tensor.random.gumbel(.5, 1.0, 100, chunk_size=10)
        self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (100,))

        arr = tensor.random.gumbel(.5, 1.0, 100, chunk_size=10).tiles()
        for chunk in arr.chunks:
            chunk.op._seed = 0

        for res in self.executor.execute_tensor(arr):
            self.assertTrue(np.array_equal(res, np.random.RandomState(0).gumbel(.5, 1.0, 10)))

    def testHypergeometricExecution(self):
        arr = tensor.random.hypergeometric(10, 20, 15, 100, chunk_size=10)
        self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (100,))

        arr = tensor.random.hypergeometric(10, 20, 15, 100, chunk_size=10).tiles()
        for chunk in arr.chunks:
            chunk.op._seed = 0

        for res in self.executor.execute_tensor(arr):
            self.assertTrue(np.array_equal(res, np.random.RandomState(0).hypergeometric(10, 20, 15, 10)))

    def testLaplaceExecution(self):
        arr = tensor.random.laplace(.5, 1.0, 100, chunk_size=10)
        self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (100,))

        arr = tensor.random.laplace(.5, 1.0, 100, chunk_size=10).tiles()
        for chunk in arr.chunks:
            chunk.op._seed = 0

        for res in self.executor.execute_tensor(arr):
            self.assertTrue(np.array_equal(res, np.random.RandomState(0).laplace(.5, 1.0, 10)))

    def testLogisticExecution(self):
        arr = tensor.random.logistic(.5, 1.0, 100, chunk_size=10)
        self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (100,))

        arr = tensor.random.logistic(.5, 1.0, 100, chunk_size=10).tiles()
        for chunk in arr.chunks:
            chunk.op._seed = 0

        for res in self.executor.execute_tensor(arr):
            np.testing.assert_equal(res, np.random.RandomState(0).logistic(.5, 1.0, 10))

    def testLognormalExecution(self):
        arr = tensor.random.lognormal(.5, 1.0, 100, chunk_size=10)
        self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (100,))

        arr = tensor.random.lognormal(.5, 1.0, 100, chunk_size=10).tiles()
        for chunk in arr.chunks:
            chunk.op._seed = 0

        for res in self.executor.execute_tensor(arr):
            self.assertTrue(np.array_equal(res, np.random.RandomState(0).lognormal(.5, 1.0, 10)))

    def testLogseriesExecution(self):
        arr = tensor.random.logseries(.5, 100, chunk_size=10)
        self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (100,))

        arr = tensor.random.logseries(.5, 100, chunk_size=10).tiles()
        for chunk in arr.chunks:
            chunk.op._seed = 0

        for res in self.executor.execute_tensor(arr):
            self.assertTrue(np.array_equal(res, np.random.RandomState(0).logseries(.5, 10)))

    def testMultinomialExecution(self):
        arr = tensor.random.multinomial(10, [.2, .5, .3], 100, chunk_size=10)
        self.assertEqual(arr.shape, (100, 3))
        self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (100, 3))

        arr = tensor.random.multinomial(10, [.2, .5, .3], 100, chunk_size=10).tiles()
        for chunk in arr.chunks:
            chunk.op._seed = 0

        for res in self.executor.execute_tensor(arr):
            self.assertTrue(np.array_equal(res, np.random.RandomState(0).multinomial(10, [.2, .5, .3], 10)))

    def testMultivariateNormalExecution(self):
        arr = tensor.random.multivariate_normal([1, 2], [[1, 0], [0, 1]], 100, chunk_size=10)
        self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (100, 2))

        arr = tensor.random.multivariate_normal([1, 2], [[1, 0], [0, 1]], 100, chunk_size=10).tiles()
        for chunk in arr.chunks:
            chunk.op._seed = 0

        for res in self.executor.execute_tensor(arr):
            self.assertTrue(np.array_equal(res, np.random.RandomState(0).multivariate_normal(
                [1, 2], [[1, 0], [0, 1]], 10)))

    def testNegativeBinomialExecution(self):
        arr = tensor.random.negative_binomial(5, 1.0, 100, chunk_size=10)
        self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (100,))

        arr = tensor.random.negative_binomial(5, 1.0, 100, chunk_size=10).tiles()
        for chunk in arr.chunks:
            chunk.op._seed = 0

        for res in self.executor.execute_tensor(arr):
            self.assertTrue(np.array_equal(res, np.random.RandomState(0).negative_binomial(5, 1.0, 10)))

    def testNoncentralChisquareExecution(self):
        arr = tensor.random.noncentral_chisquare(.5, 1.0, 100, chunk_size=10)
        self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (100,))

        arr = tensor.random.noncentral_chisquare(.5, 1.0, 100, chunk_size=10).tiles()
        for chunk in arr.chunks:
            chunk.op._seed = 0

        for res in self.executor.execute_tensor(arr):
            self.assertTrue(np.array_equal(res, np.random.RandomState(0).noncentral_chisquare(.5, 1.0, 10)))

    def testNoncentralFExecution(self):
        arr = tensor.random.noncentral_f(1.5, 1.0, 1.1, 100, chunk_size=10)
        self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (100,))

        arr = tensor.random.noncentral_f(1.5, 1.0, 1.1, 100, chunk_size=10).tiles()
        for chunk in arr.chunks:
            chunk.op._seed = 0

        for res in self.executor.execute_tensor(arr):
            self.assertTrue(np.array_equal(res, np.random.RandomState(0).noncentral_f(1.5, 1.0, 1.1, 10)))

    def testNormalExecute(self):
        arr = tensor.random.normal(10, 1.0, 100, chunk_size=10)
        self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (100,))

        arr = tensor.random.normal(10, 1.0, 100, chunk_size=10).tiles()
        for chunk in arr.chunks:
            chunk.op._seed = 0

        for res in self.executor.execute_tensor(arr):
            self.assertTrue(np.array_equal(res, np.random.RandomState(0).normal(10, 1.0, 10)))

    def testParetoExecute(self):
        arr = tensor.random.pareto(1.0, 100, chunk_size=10)
        self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (100,))

        arr = tensor.random.pareto(1.0, 100, chunk_size=10).tiles()
        for chunk in arr.chunks:
            chunk.op._seed = 0

        for res in self.executor.execute_tensor(arr):
            self.assertTrue(np.array_equal(res, np.random.RandomState(0).pareto(1.0, 10)))

    def testPoissonExecute(self):
        arr = tensor.random.poisson(1.0, 100, chunk_size=10)
        self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (100,))

        arr = tensor.random.poisson(1.0, 100, chunk_size=10).tiles()
        for chunk in arr.chunks:
            chunk.op._seed = 0

        for res in self.executor.execute_tensor(arr):
            self.assertTrue(np.array_equal(res, np.random.RandomState(0).poisson(1.0, 10)))

    def testPowerExecute(self):
        arr = tensor.random.power(1.0, 100, chunk_size=10)
        self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (100,))

        arr = tensor.random.power(1.0, 100, chunk_size=10).tiles()
        for chunk in arr.chunks:
            chunk.op._seed = 0

        for res in self.executor.execute_tensor(arr):
            self.assertTrue(np.array_equal(res, np.random.RandomState(0).power(1.0, 10)))

    def testRayleighExecute(self):
        arr = tensor.random.rayleigh(1.0, 100, chunk_size=10)
        self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (100,))

        arr = tensor.random.rayleigh(1.0, 100, chunk_size=10).tiles()
        for chunk in arr.chunks:
            chunk.op._seed = 0

        for res in self.executor.execute_tensor(arr):
            self.assertTrue(np.array_equal(res, np.random.RandomState(0).rayleigh(1.0, 10)))

    def testStandardCauchyExecute(self):
        arr = tensor.random.standard_cauchy(100, chunk_size=10)
        self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (100,))

        arr = tensor.random.standard_cauchy(100, chunk_size=10).tiles()
        for chunk in arr.chunks:
            chunk.op._seed = 0

        for res in self.executor.execute_tensor(arr):
            self.assertTrue(np.array_equal(res, np.random.RandomState(0).standard_cauchy(10)))

    def testStandardExponentialExecute(self):
        arr = tensor.random.standard_exponential(100, chunk_size=10)
        self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (100,))

        arr = tensor.random.standard_exponential(100, chunk_size=10).tiles()
        for chunk in arr.chunks:
            chunk.op._seed = 0

        for res in self.executor.execute_tensor(arr):
            self.assertTrue(np.array_equal(res, np.random.RandomState(0).standard_exponential(10)))

    def testStandardGammaExecute(self):
        arr = tensor.random.standard_gamma(.1, 100, chunk_size=10)
        self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (100,))

        arr = tensor.random.standard_gamma(.1, 100, chunk_size=10).tiles()
        for chunk in arr.chunks:
            chunk.op._seed = 0

        for res in self.executor.execute_tensor(arr):
            self.assertTrue(np.array_equal(res, np.random.RandomState(0).standard_gamma(.1, 10)))

    def testStandardNormalExecute(self):
        arr = tensor.random.standard_normal(100, chunk_size=10)
        self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (100,))

        arr = tensor.random.standard_normal(100, chunk_size=10).tiles()
        for chunk in arr.chunks:
            chunk.op._seed = 0

        for res in self.executor.execute_tensor(arr):
            self.assertTrue(np.array_equal(res, np.random.RandomState(0).standard_normal(10)))

    def testStandardTExecute(self):
        arr = tensor.random.standard_t(.1, 100, chunk_size=10)
        self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (100,))

        arr = tensor.random.standard_t(.1, 100, chunk_size=10).tiles()
        for chunk in arr.chunks:
            chunk.op._seed = 0

        for res in self.executor.execute_tensor(arr):
            self.assertTrue(np.array_equal(res, np.random.RandomState(0).standard_t(.1, 10)))

    def testTriangularExecute(self):
        arr = tensor.random.triangular(.1, .2, .3, 100, chunk_size=10)
        self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (100,))

        arr = tensor.random.triangular(.1, .2, .3, 100, chunk_size=10).tiles()
        for chunk in arr.chunks:
            chunk.op._seed = 0

        for res in self.executor.execute_tensor(arr):
            self.assertTrue(np.array_equal(res, np.random.RandomState(0).triangular(.1, .2, .3, 10)))

    def testUniformExecute(self):
        arr = tensor.random.uniform(.1, .2, 100, chunk_size=10)
        self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (100,))

        arr = tensor.random.uniform(.1, .2, 100, chunk_size=10).tiles()
        for chunk in arr.chunks:
            chunk.op._seed = 0

        for res in self.executor.execute_tensor(arr):
            self.assertTrue(np.array_equal(res, np.random.RandomState(0).uniform(.1, .2, 10)))

    def testVonmisesExecute(self):
        arr = tensor.random.vonmises(.1, .2, 100, chunk_size=10)
        self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (100,))

        arr = tensor.random.vonmises(.1, .2, 100, chunk_size=10).tiles()
        for chunk in arr.chunks:
            chunk.op._seed = 0

        for res in self.executor.execute_tensor(arr):
            self.assertTrue(np.array_equal(res, np.random.RandomState(0).vonmises(.1, .2, 10)))

    def testWaldExecute(self):
        arr = tensor.random.wald(.1, .2, 100, chunk_size=10)
        self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (100,))

        arr = tensor.random.wald(.1, .2, 100, chunk_size=10).tiles()
        for chunk in arr.chunks:
            chunk.op._seed = 0

        for res in self.executor.execute_tensor(arr):
            self.assertTrue(np.array_equal(res, np.random.RandomState(0).wald(.1, .2, 10)))

    def testWeibullExecute(self):
        arr = tensor.random.weibull(.1, 100, chunk_size=10)
        self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (100,))

        arr = tensor.random.weibull(.1, 100, chunk_size=10).tiles()
        for chunk in arr.chunks:
            chunk.op._seed = 0

        for res in self.executor.execute_tensor(arr):
            self.assertTrue(np.array_equal(res, np.random.RandomState(0).weibull(.1, 10)))

    def testZipfExecute(self):
        arr = tensor.random.zipf(1.1, 100, chunk_size=10)
        self.assertEqual(self.executor.execute_tensor(arr, concat=True)[0].shape, (100,))

        arr = tensor.random.zipf(1.1, 100, chunk_size=10).tiles()
        for chunk in arr.chunks:
            chunk.op._seed = 0

        for res in self.executor.execute_tensor(arr):
            self.assertTrue(np.array_equal(res, np.random.RandomState(0).zipf(1.1, 10)))

    def testPermutationExecute(self):
        x = tensor.random.permutation(10)
        res = self.executor.execute_tensor(x, concat=True)[0]
        self.assertFalse(np.all(res[:-1] < res[1:]))
        np.testing.assert_array_equal(np.sort(res), np.arange(10))

        arr = from_ndarray([1, 4, 9, 12, 15], chunk_size=2)
        x = tensor.random.permutation(arr)
        res = self.executor.execute_tensor(x, concat=True)[0]
        self.assertFalse(np.all(res[:-1] < res[1:]))
        np.testing.assert_array_equal(np.sort(res), np.asarray([1, 4, 9, 12, 15]))

        arr = from_ndarray(np.arange(48).reshape(12, 4), chunk_size=2)
        # axis = 0
        x = tensor.random.permutation(arr)
        res = self.executor.execute_tensor(x, concat=True)[0]
        self.assertFalse(np.all(res[:-1] < res[1:]))
        np.testing.assert_array_equal(np.sort(res, axis=0), np.arange(48).reshape(12, 4))
        # axis != 0
        x2 = tensor.random.permutation(arr, axis=1)
        res = self.executor.execute_tensor(x2, concat=True)[0]
        self.assertFalse(np.all(res[:, :-1] < res[:, 1:]))
        np.testing.assert_array_equal(np.sort(res, axis=1), np.arange(48).reshape(12, 4))
Ejemplo n.º 21
0
class Test(TestBase):
    def setUp(self):
        super().setUp()
        self.executor = ExecutorForTest()

    @require_cudf
    def testToGPUExecution(self):
        pdf = pd.DataFrame(np.random.rand(20, 30), index=np.arange(20, 0, -1))
        df = from_pandas_df(pdf, chunk_size=(13, 21))
        cdf = to_gpu(df)

        res = self.executor.execute_dataframe(cdf, concat=True)[0]
        self.assertIsInstance(res, cudf.DataFrame)
        pd.testing.assert_frame_equal(res.to_pandas(), pdf)

        pseries = pdf.iloc[:, 0]
        series = from_pandas_series(pseries)
        cseries = series.to_gpu()

        res = self.executor.execute_dataframe(cseries, concat=True)[0]
        self.assertIsInstance(res, cudf.Series)
        pd.testing.assert_series_equal(res.to_pandas(), pseries)

    @require_cudf
    def testToCPUExecution(self):
        pdf = pd.DataFrame(np.random.rand(20, 30), index=np.arange(20, 0, -1))
        df = from_pandas_df(pdf, chunk_size=(13, 21))
        cdf = to_gpu(df)
        df2 = to_cpu(cdf)

        res = self.executor.execute_dataframe(df2, concat=True)[0]
        self.assertIsInstance(res, pd.DataFrame)
        pd.testing.assert_frame_equal(res, pdf)

        pseries = pdf.iloc[:, 0]
        series = from_pandas_series(pseries, chunk_size=(13, 21))
        cseries = to_gpu(series)
        series2 = to_cpu(cseries)

        res = self.executor.execute_dataframe(series2, concat=True)[0]
        self.assertIsInstance(res, pd.Series)
        pd.testing.assert_series_equal(res, pseries)

    def testRechunkExecution(self):
        data = pd.DataFrame(np.random.rand(8, 10))
        df = from_pandas_df(pd.DataFrame(data), chunk_size=3)
        df2 = df.rechunk((3, 4))
        res = self.executor.execute_dataframe(df2, concat=True)[0]
        pd.testing.assert_frame_equal(data, res)

        data = pd.DataFrame(np.random.rand(10, 10), index=np.random.randint(-100, 100, size=(10,)),
                            columns=[np.random.bytes(10) for _ in range(10)])
        df = from_pandas_df(data)
        df2 = df.rechunk(5)
        res = self.executor.execute_dataframe(df2, concat=True)[0]
        pd.testing.assert_frame_equal(data, res)

        # test Series rechunk execution.
        data = pd.Series(np.random.rand(10,))
        series = from_pandas_series(data)
        series2 = series.rechunk(3)
        res = self.executor.execute_dataframe(series2, concat=True)[0]
        pd.testing.assert_series_equal(data, res)

        series2 = series.rechunk(1)
        res = self.executor.execute_dataframe(series2, concat=True)[0]
        pd.testing.assert_series_equal(data, res)

        # test index rechunk execution
        data = pd.Index(np.random.rand(10,))
        index = from_pandas_index(data)
        index2 = index.rechunk(3)
        res = self.executor.execute_dataframe(index2, concat=True)[0]
        pd.testing.assert_index_equal(data, res)

        index2 = index.rechunk(1)
        res = self.executor.execute_dataframe(index2, concat=True)[0]
        pd.testing.assert_index_equal(data, res)

    def testResetIndexExecution(self):
        data = pd.DataFrame([('bird',    389.0),
                             ('bird',     24.0),
                             ('mammal',   80.5),
                             ('mammal', np.nan)],
                            index=['falcon', 'parrot', 'lion', 'monkey'],
                            columns=('class', 'max_speed'))
        df = from_pandas_df(data)
        df2 = df_reset_index(df)
        result = self.executor.execute_dataframe(df2, concat=True)[0]
        expected = data.reset_index()
        pd.testing.assert_frame_equal(result, expected)

        df = from_pandas_df(data, chunk_size=2)
        df2 = df_reset_index(df)
        result = self.executor.execute_dataframe(df2, concat=True)[0]
        expected = data.reset_index()
        pd.testing.assert_frame_equal(result, expected)

        df = from_pandas_df(data, chunk_size=1)
        df2 = df_reset_index(df, drop=True)
        result = self.executor.execute_dataframe(df2, concat=True)[0]
        expected = data.reset_index(drop=True)
        pd.testing.assert_frame_equal(result, expected)

        index = pd.MultiIndex.from_tuples([('bird', 'falcon'),
                                           ('bird', 'parrot'),
                                           ('mammal', 'lion'),
                                           ('mammal', 'monkey')],
                                          names=['class', 'name'])
        data = pd.DataFrame([('bird',    389.0),
                             ('bird',     24.0),
                             ('mammal',   80.5),
                             ('mammal', np.nan)],
                            index=index,
                            columns=('type', 'max_speed'))
        df = from_pandas_df(data, chunk_size=1)
        df2 = df_reset_index(df, level='class')
        result = self.executor.execute_dataframe(df2, concat=True)[0]
        expected = data.reset_index(level='class')
        pd.testing.assert_frame_equal(result, expected)

        columns = pd.MultiIndex.from_tuples([('speed', 'max'), ('species', 'type')])
        data.columns = columns
        df = from_pandas_df(data, chunk_size=2)
        df2 = df_reset_index(df, level='class', col_level=1, col_fill='species')
        result = self.executor.execute_dataframe(df2, concat=True)[0]
        expected = data.reset_index(level='class', col_level=1, col_fill='species')
        pd.testing.assert_frame_equal(result, expected)

        # Test Series

        s = pd.Series([1, 2, 3, 4], name='foo',
                      index=pd.Index(['a', 'b', 'c', 'd'], name='idx'))

        series = from_pandas_series(s)
        s2 = series_reset_index(series, name='bar')
        result = self.executor.execute_dataframe(s2, concat=True)[0]
        expected = s.reset_index(name='bar')
        pd.testing.assert_frame_equal(result, expected)

        series = from_pandas_series(s, chunk_size=2)
        s2 = series_reset_index(series, drop=True)
        result = self.executor.execute_dataframe(s2, concat=True)[0]
        expected = s.reset_index(drop=True)
        pd.testing.assert_series_equal(result, expected)

        # Test Unknown shape
        sess = new_session()
        data1 = pd.DataFrame(np.random.rand(10, 3), index=[0, 10, 2, 3, 4, 5, 6, 7, 8, 9])
        df1 = from_pandas_df(data1, chunk_size=5)
        data2 = pd.DataFrame(np.random.rand(10, 3), index=[11, 1, 2, 5, 7, 6, 8, 9, 10, 3])
        df2 = from_pandas_df(data2, chunk_size=6)
        df = (df1 + df2).reset_index()
        result = sess.run(df)
        pd.testing.assert_index_equal(result.index, pd.RangeIndex(12))
        # Inconsistent with Pandas when input dataframe's shape is unknown.
        result = result.sort_values(by=result.columns[0])
        expected = (data1 + data2).reset_index()
        np.testing.assert_array_equal(result.to_numpy(), expected.to_numpy())

        data1 = pd.Series(np.random.rand(10,), index=[0, 10, 2, 3, 4, 5, 6, 7, 8, 9])
        series1 = from_pandas_series(data1, chunk_size=3)
        data2 = pd.Series(np.random.rand(10,), index=[11, 1, 2, 5, 7, 6, 8, 9, 10, 3])
        series2 = from_pandas_series(data2, chunk_size=3)
        df = (series1 + series2).reset_index()
        result = sess.run(df)
        pd.testing.assert_index_equal(result.index, pd.RangeIndex(12))
        # Inconsistent with Pandas when input dataframe's shape is unknown.
        result = result.sort_values(by=result.columns[0])
        expected = (data1 + data2).reset_index()
        np.testing.assert_array_equal(result.to_numpy(), expected.to_numpy())

    def testSeriesMapExecution(self):
        raw = pd.Series(np.arange(10))
        s = from_pandas_series(raw, chunk_size=7)

        with self.assertRaises(ValueError):
            # cannot infer dtype, the inferred is int,
            # but actually it is float
            # just due to nan
            s.map({5: 10})

        r = s.map({5: 10}, dtype=float)
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = raw.map({5: 10})
        pd.testing.assert_series_equal(result, expected)

        r = s.map({i: 10 + i for i in range(7)}, dtype=float)
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = raw.map({i: 10 + i for i in range(7)})
        pd.testing.assert_series_equal(result, expected)

        r = s.map({5: 10}, dtype=float, na_action='ignore')
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = raw.map({5: 10}, na_action='ignore')
        pd.testing.assert_series_equal(result, expected)

        # dtype can be inferred
        r = s.map({5: 10.})
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = raw.map({5: 10.})
        pd.testing.assert_series_equal(result, expected)

        r = s.map(lambda x: x + 1, dtype=int)
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = raw.map(lambda x: x + 1)
        pd.testing.assert_series_equal(result, expected)

        def f(x: int) -> float:
            return x + 1.

        # dtype can be inferred for function
        r = s.map(f)
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = raw.map(lambda x: x + 1.)
        pd.testing.assert_series_equal(result, expected)

        # test arg is a md.Series
        raw2 = pd.Series([10], index=[5])
        s2 = from_pandas_series(raw2)

        r = s.map(s2, dtype=float)
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = raw.map(raw2)
        pd.testing.assert_series_equal(result, expected)

        # test arg is a md.Series, and dtype can be inferred
        raw2 = pd.Series([10.], index=[5])
        s2 = from_pandas_series(raw2)

        r = s.map(s2)
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = raw.map(raw2)
        pd.testing.assert_series_equal(result, expected)

        # test str
        raw = pd.Series(['a', 'b', 'c', 'd'])
        s = from_pandas_series(raw, chunk_size=2)

        r = s.map({'c': 'e'})
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = raw.map({'c': 'e'})
        pd.testing.assert_series_equal(result, expected)

    def testDescribeExecution(self):
        s_raw = pd.Series(np.random.rand(10))

        # test one chunk
        series = from_pandas_series(s_raw, chunk_size=10)

        r = series.describe()
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = s_raw.describe()
        pd.testing.assert_series_equal(result, expected)

        r = series.describe(percentiles=[])
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = s_raw.describe(percentiles=[])
        pd.testing.assert_series_equal(result, expected)

        # test multi chunks
        series = from_pandas_series(s_raw, chunk_size=3)

        r = series.describe()
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = s_raw.describe()
        pd.testing.assert_series_equal(result, expected)

        r = series.describe(percentiles=[])
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = s_raw.describe(percentiles=[])
        pd.testing.assert_series_equal(result, expected)

        df_raw = pd.DataFrame(np.random.rand(10, 4), columns=list('abcd'))
        df_raw['e'] = np.random.randint(100, size=10)

        # test one chunk
        df = from_pandas_df(df_raw, chunk_size=10)

        r = df.describe()
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = df_raw.describe()
        pd.testing.assert_frame_equal(result, expected)

        r = series.describe(percentiles=[], include=np.float64)
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = s_raw.describe(percentiles=[], include=np.float64)
        pd.testing.assert_series_equal(result, expected)

        # test multi chunks
        df = from_pandas_df(df_raw, chunk_size=3)

        r = df.describe()
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = df_raw.describe()
        pd.testing.assert_frame_equal(result, expected)

        r = df.describe(percentiles=[], include=np.float64)
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = df_raw.describe(percentiles=[], include=np.float64)
        pd.testing.assert_frame_equal(result, expected)

        with self.assertRaises(ValueError):
            df.describe(percentiles=[1.1])

    def testDataFrameFillNAExecution(self):
        df_raw = pd.DataFrame(np.nan, index=range(0, 20), columns=list('ABCDEFGHIJ'))
        for _ in range(20):
            df_raw.iloc[random.randint(0, 19), random.randint(0, 9)] = random.randint(0, 99)

        value_df_raw = pd.DataFrame(np.random.randint(0, 100, (10, 7)).astype(np.float32),
                                    columns=list('ABCDEFG'))

        # test DataFrame single chunk with numeric fill
        df = from_pandas_df(df_raw)
        r = df.fillna(1)
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = df_raw.fillna(1)
        pd.testing.assert_frame_equal(result, expected)

        # test DataFrame single chunk with value as single chunk
        df = from_pandas_df(df_raw)
        value_df = from_pandas_df(value_df_raw)
        r = df.fillna(value_df)
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = df_raw.fillna(value_df_raw)
        pd.testing.assert_frame_equal(result, expected)

        # test chunked with numeric fill
        df = from_pandas_df(df_raw, chunk_size=3)
        r = df.fillna(1)
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = df_raw.fillna(1)
        pd.testing.assert_frame_equal(result, expected)

        # test inplace tile
        df = from_pandas_df(df_raw, chunk_size=3)
        df.fillna(1, inplace=True)
        result = self.executor.execute_dataframe(df, concat=True)[0]
        expected = df_raw.fillna(1)
        pd.testing.assert_frame_equal(result, expected)

        # test forward fill in axis=0 without limit
        df = from_pandas_df(df_raw, chunk_size=3)
        r = df.fillna(method='pad')
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = df_raw.fillna(method='pad')
        pd.testing.assert_frame_equal(result, expected)

        # test backward fill in axis=0 without limit
        df = from_pandas_df(df_raw, chunk_size=3)
        r = df.fillna(method='backfill')
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = df_raw.fillna(method='backfill')
        pd.testing.assert_frame_equal(result, expected)

        # test forward fill in axis=1 without limit
        df = from_pandas_df(df_raw, chunk_size=3)
        r = df.ffill(axis=1)
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = df_raw.ffill(axis=1)
        pd.testing.assert_frame_equal(result, expected)

        # test backward fill in axis=1 without limit
        df = from_pandas_df(df_raw, chunk_size=3)
        r = df.bfill(axis=1)
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = df_raw.bfill(axis=1)
        pd.testing.assert_frame_equal(result, expected)

        # test fill with dataframe
        df = from_pandas_df(df_raw, chunk_size=3)
        value_df = from_pandas_df(value_df_raw, chunk_size=4)
        r = df.fillna(value_df)
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = df_raw.fillna(value_df_raw)
        pd.testing.assert_frame_equal(result, expected)

        # test fill with series
        value_series_raw = pd.Series(np.random.randint(0, 100, (10,)).astype(np.float32),
                                     index=list('ABCDEFGHIJ'))
        df = from_pandas_df(df_raw, chunk_size=3)
        value_series = from_pandas_series(value_series_raw, chunk_size=4)
        r = df.fillna(value_series)
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = df_raw.fillna(value_series_raw)
        pd.testing.assert_frame_equal(result, expected)

    def testSeriesFillNAExecution(self):
        series_raw = pd.Series(np.nan, index=range(20))
        for _ in range(3):
            series_raw.iloc[random.randint(0, 19)] = random.randint(0, 99)
        value_series_raw = pd.Series(np.random.randint(0, 100, (10,)).astype(np.float32))

        series = from_pandas_series(series_raw)
        r = series.fillna(1)
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = series_raw.fillna(1)
        pd.testing.assert_series_equal(result, expected)

        # test DataFrame single chunk with value as single chunk
        series = from_pandas_series(series_raw)
        value_series = from_pandas_series(value_series_raw)
        r = series.fillna(value_series)
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = series_raw.fillna(value_series_raw)
        pd.testing.assert_series_equal(result, expected)

        # test chunked with numeric fill
        series = from_pandas_series(series_raw, chunk_size=3)
        r = series.fillna(1)
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = series_raw.fillna(1)
        pd.testing.assert_series_equal(result, expected)

        # test inplace tile
        series = from_pandas_series(series_raw, chunk_size=3)
        series.fillna(1, inplace=True)
        result = self.executor.execute_dataframe(series, concat=True)[0]
        expected = series_raw.fillna(1)
        pd.testing.assert_series_equal(result, expected)

        # test forward fill in axis=0 without limit
        series = from_pandas_series(series_raw, chunk_size=3)
        r = series.fillna(method='pad')
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = series_raw.fillna(method='pad')
        pd.testing.assert_series_equal(result, expected)

        # test backward fill in axis=0 without limit
        series = from_pandas_series(series_raw, chunk_size=3)
        r = series.fillna(method='backfill')
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = series_raw.fillna(method='backfill')
        pd.testing.assert_series_equal(result, expected)

        # test fill with series
        series = from_pandas_series(series_raw, chunk_size=3)
        value_df = from_pandas_series(value_series_raw, chunk_size=4)
        r = series.fillna(value_df)
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = series_raw.fillna(value_series_raw)
        pd.testing.assert_series_equal(result, expected)

    def testDataFrameApplyExecute(self):
        cols = [chr(ord('A') + i) for i in range(10)]
        df_raw = pd.DataFrame(dict((c, [i ** 2 for i in range(20)]) for c in cols))

        old_chunk_store_limit = options.chunk_store_limit
        try:
            options.chunk_store_limit = 20

            df = from_pandas_df(df_raw, chunk_size=5)

            r = df.apply('ffill')
            result = self.executor.execute_dataframe(r, concat=True)[0]
            expected = df_raw.apply('ffill')
            pd.testing.assert_frame_equal(result, expected)

            r = df.apply(['sum', 'max'])
            result = self.executor.execute_dataframe(r, concat=True)[0]
            expected = df_raw.apply(['sum', 'max'])
            pd.testing.assert_frame_equal(result, expected)

            r = df.apply(np.sqrt)
            result = self.executor.execute_dataframe(r, concat=True)[0]
            expected = df_raw.apply(np.sqrt)
            pd.testing.assert_frame_equal(result, expected)

            r = df.apply(lambda x: pd.Series([1, 2]))
            result = self.executor.execute_dataframe(r, concat=True)[0]
            expected = df_raw.apply(lambda x: pd.Series([1, 2]))
            pd.testing.assert_frame_equal(result, expected)

            r = df.apply(np.sum, axis='index')
            result = self.executor.execute_dataframe(r, concat=True)[0]
            expected = df_raw.apply(np.sum, axis='index')
            pd.testing.assert_series_equal(result, expected)

            r = df.apply(np.sum, axis='columns')
            result = self.executor.execute_dataframe(r, concat=True)[0]
            expected = df_raw.apply(np.sum, axis='columns')
            pd.testing.assert_series_equal(result, expected)

            r = df.apply(lambda x: [1, 2], axis=1)
            result = self.executor.execute_dataframe(r, concat=True)[0]
            expected = df_raw.apply(lambda x: [1, 2], axis=1)
            pd.testing.assert_series_equal(result, expected)

            r = df.apply(lambda x: pd.Series([1, 2], index=['foo', 'bar']), axis=1)
            result = self.executor.execute_dataframe(r, concat=True)[0]
            expected = df_raw.apply(lambda x: pd.Series([1, 2], index=['foo', 'bar']), axis=1)
            pd.testing.assert_frame_equal(result, expected)

            r = df.apply(lambda x: [1, 2], axis=1, result_type='expand')
            result = self.executor.execute_dataframe(r, concat=True)[0]
            expected = df_raw.apply(lambda x: [1, 2], axis=1, result_type='expand')
            pd.testing.assert_frame_equal(result, expected)

            r = df.apply(lambda x: list(range(10)), axis=1, result_type='reduce')
            result = self.executor.execute_dataframe(r, concat=True)[0]
            expected = df_raw.apply(lambda x: list(range(10)), axis=1, result_type='reduce')
            pd.testing.assert_series_equal(result, expected)

            r = df.apply(lambda x: list(range(10)), axis=1, result_type='broadcast')
            result = self.executor.execute_dataframe(r, concat=True)[0]
            expected = df_raw.apply(lambda x: list(range(10)), axis=1, result_type='broadcast')
            pd.testing.assert_frame_equal(result, expected)
        finally:
            options.chunk_store_limit = old_chunk_store_limit

    def testSeriesApplyExecute(self):
        idxes = [chr(ord('A') + i) for i in range(20)]
        s_raw = pd.Series([i ** 2 for i in range(20)], index=idxes)

        series = from_pandas_series(s_raw, chunk_size=5)

        r = series.apply('add', args=(1,))
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = s_raw.apply('add', args=(1,))
        pd.testing.assert_series_equal(result, expected)

        r = series.apply(['sum', 'max'])
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = s_raw.apply(['sum', 'max'])
        pd.testing.assert_series_equal(result, expected)

        r = series.apply(np.sqrt)
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = s_raw.apply(np.sqrt)
        pd.testing.assert_series_equal(result, expected)

        r = series.apply('sqrt')
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = s_raw.apply('sqrt')
        pd.testing.assert_series_equal(result, expected)

        r = series.apply(lambda x: [x, x + 1], convert_dtype=False)
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = s_raw.apply(lambda x: [x, x + 1], convert_dtype=False)
        pd.testing.assert_series_equal(result, expected)

    def testTransformExecute(self):
        cols = [chr(ord('A') + i) for i in range(10)]
        df_raw = pd.DataFrame(dict((c, [i ** 2 for i in range(20)]) for c in cols))

        idx_vals = [chr(ord('A') + i) for i in range(20)]
        s_raw = pd.Series([i ** 2 for i in range(20)], index=idx_vals)

        def rename_fn(f, new_name):
            f.__name__ = new_name
            return f

        old_chunk_store_limit = options.chunk_store_limit
        try:
            options.chunk_store_limit = 20

            # DATAFRAME CASES
            df = from_pandas_df(df_raw, chunk_size=5)

            # test transform scenarios on data frames
            r = df.transform(lambda x: list(range(len(x))))
            result = self.executor.execute_dataframe(r, concat=True)[0]
            expected = df_raw.transform(lambda x: list(range(len(x))))
            pd.testing.assert_frame_equal(result, expected)

            r = df.transform(lambda x: list(range(len(x))), axis=1)
            result = self.executor.execute_dataframe(r, concat=True)[0]
            expected = df_raw.transform(lambda x: list(range(len(x))), axis=1)
            pd.testing.assert_frame_equal(result, expected)

            r = df.transform(['cumsum', 'cummax', lambda x: x + 1])
            result = self.executor.execute_dataframe(r, concat=True)[0]
            expected = df_raw.transform(['cumsum', 'cummax', lambda x: x + 1])
            pd.testing.assert_frame_equal(result, expected)

            fn_dict = OrderedDict([
                ('A', 'cumsum'),
                ('D', ['cumsum', 'cummax']),
                ('F', lambda x: x + 1),
            ])
            r = df.transform(fn_dict)
            result = self.executor.execute_dataframe(r, concat=True)[0]
            expected = df_raw.transform(fn_dict)
            pd.testing.assert_frame_equal(result, expected)

            # test agg scenarios on series
            r = df.transform(lambda x: x.iloc[:-1], _call_agg=True)
            result = self.executor.execute_dataframe(r, concat=True)[0]
            expected = df_raw.agg(lambda x: x.iloc[:-1])
            pd.testing.assert_frame_equal(result, expected)

            r = df.transform(lambda x: x.iloc[:-1], axis=1, _call_agg=True)
            result = self.executor.execute_dataframe(r, concat=True)[0]
            expected = df_raw.agg(lambda x: x.iloc[:-1], axis=1)
            pd.testing.assert_frame_equal(result, expected)

            fn_list = [rename_fn(lambda x: x.iloc[1:].reset_index(drop=True), 'f1'),
                       lambda x: x.iloc[:-1].reset_index(drop=True)]
            r = df.transform(fn_list, _call_agg=True)
            result = self.executor.execute_dataframe(r, concat=True)[0]
            expected = df_raw.agg(fn_list)
            pd.testing.assert_frame_equal(result, expected)

            r = df.transform(lambda x: x.sum(), _call_agg=True)
            result = self.executor.execute_dataframe(r, concat=True)[0]
            expected = df_raw.agg(lambda x: x.sum())
            pd.testing.assert_series_equal(result, expected)

            fn_dict = OrderedDict([
                ('A', rename_fn(lambda x: x.iloc[1:].reset_index(drop=True), 'f1')),
                ('D', [rename_fn(lambda x: x.iloc[1:].reset_index(drop=True), 'f1'),
                       lambda x: x.iloc[:-1].reset_index(drop=True)]),
                ('F', lambda x: x.iloc[:-1].reset_index(drop=True)),
            ])
            r = df.transform(fn_dict, _call_agg=True)
            result = self.executor.execute_dataframe(r, concat=True)[0]
            expected = df_raw.agg(fn_dict)
            pd.testing.assert_frame_equal(result, expected)

            # SERIES CASES
            series = from_pandas_series(s_raw, chunk_size=5)

            # test transform scenarios on series
            r = series.transform(lambda x: x + 1)
            result = self.executor.execute_dataframe(r, concat=True)[0]
            expected = s_raw.transform(lambda x: x + 1)
            pd.testing.assert_series_equal(result, expected)

            r = series.transform(['cumsum', lambda x: x + 1])
            result = self.executor.execute_dataframe(r, concat=True)[0]
            expected = s_raw.transform(['cumsum', lambda x: x + 1])
            pd.testing.assert_frame_equal(result, expected)
        finally:
            options.chunk_store_limit = old_chunk_store_limit

    def testStringMethodExecution(self):
        s = pd.Series(['s1,s2', 'ef,', 'dd', np.nan])
        s2 = pd.concat([s, s, s])

        series = from_pandas_series(s, chunk_size=2)
        series2 = from_pandas_series(s2, chunk_size=2)

        # test getitem
        r = series.str[:3]
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = s.str[:3]
        pd.testing.assert_series_equal(result, expected)

        # test split, expand=False
        r = series.str.split(',', n=2)
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = s.str.split(',', n=2)
        pd.testing.assert_series_equal(result, expected)

        # test split, expand=True
        r = series.str.split(',', expand=True, n=1)
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = s.str.split(',', expand=True, n=1)
        pd.testing.assert_frame_equal(result, expected)

        # test rsplit
        r = series.str.rsplit(',', expand=True, n=1)
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = s.str.rsplit(',', expand=True, n=1)
        pd.testing.assert_frame_equal(result, expected)

        # test cat all data
        r = series2.str.cat(sep='/', na_rep='e')
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = s2.str.cat(sep='/', na_rep='e')
        self.assertEqual(result, expected)

        # test cat list
        r = series.str.cat(['a', 'b', np.nan, 'c'])
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = s.str.cat(['a', 'b', np.nan, 'c'])
        pd.testing.assert_series_equal(result, expected)

        # test cat series
        r = series.str.cat(series.str.capitalize(), join='outer')
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = s.str.cat(s.str.capitalize(), join='outer')
        pd.testing.assert_series_equal(result, expected)

        # test extractall
        r = series.str.extractall(r"(?P<letter>[ab])(?P<digit>\d)")
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = s.str.extractall(r"(?P<letter>[ab])(?P<digit>\d)")
        pd.testing.assert_frame_equal(result, expected)

        # test extract, expand=False
        r = series.str.extract(r'[ab](\d)', expand=False)
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = s.str.extract(r'[ab](\d)', expand=False)
        pd.testing.assert_series_equal(result, expected)

        # test extract, expand=True
        r = series.str.extract(r'[ab](\d)', expand=True)
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = s.str.extract(r'[ab](\d)', expand=True)
        pd.testing.assert_frame_equal(result, expected)

    def testDatetimeMethodExecution(self):
        # test datetime
        s = pd.Series([pd.Timestamp('2020-1-1'),
                       pd.Timestamp('2020-2-1'),
                       np.nan])
        series = from_pandas_series(s, chunk_size=2)

        r = series.dt.year
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = s.dt.year
        pd.testing.assert_series_equal(result, expected)

        r = series.dt.strftime('%m-%d-%Y')
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = s.dt.strftime('%m-%d-%Y')
        pd.testing.assert_series_equal(result, expected)

        # test timedelta
        s = pd.Series([pd.Timedelta('1 days'),
                       pd.Timedelta('3 days'),
                       np.nan])
        series = from_pandas_series(s, chunk_size=2)

        r = series.dt.days
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = s.dt.days
        pd.testing.assert_series_equal(result, expected)

    def testSeriesIsin(self):
        # one chunk in multiple chunks
        a = pd.Series([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
        b = pd.Series([2, 1, 9, 3])
        sa = from_pandas_series(a, chunk_size=10)
        sb = from_pandas_series(b, chunk_size=2)

        result = self.executor.execute_dataframe(sa.isin(sb), concat=True)[0]
        expected = a.isin(b)
        pd.testing.assert_series_equal(result, expected)

        # multiple chunk in one chunks
        a = pd.Series([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
        b = pd.Series([2, 1, 9, 3])
        sa = from_pandas_series(a, chunk_size=2)
        sb = from_pandas_series(b, chunk_size=4)

        result = self.executor.execute_dataframe(sa.isin(sb), concat=True)[0]
        expected = a.isin(b)
        pd.testing.assert_series_equal(result, expected)

        # multiple chunk in multiple chunks
        a = pd.Series([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
        b = pd.Series([2, 1, 9, 3])
        sa = from_pandas_series(a, chunk_size=2)
        sb = from_pandas_series(b, chunk_size=2)

        result = self.executor.execute_dataframe(sa.isin(sb), concat=True)[0]
        expected = a.isin(b)
        pd.testing.assert_series_equal(result, expected)

        a = pd.Series([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
        b = pd.Series([2, 1, 9, 3])
        sa = from_pandas_series(a, chunk_size=2)

        result = self.executor.execute_dataframe(sa.isin(b), concat=True)[0]
        expected = a.isin(b)
        pd.testing.assert_series_equal(result, expected)

        a = pd.Series([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
        b = np.array([2, 1, 9, 3])
        sa = from_pandas_series(a, chunk_size=2)
        sb = tensor(b, chunk_size=3)

        result = self.executor.execute_dataframe(sa.isin(sb), concat=True)[0]
        expected = a.isin(b)
        pd.testing.assert_series_equal(result, expected)

        a = pd.Series([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
        b = {2, 1, 9, 3}  # set
        sa = from_pandas_series(a, chunk_size=2)

        result = self.executor.execute_dataframe(sa.isin(b), concat=True)[0]
        expected = a.isin(b)
        pd.testing.assert_series_equal(result, expected)

    def testCheckNA(self):
        df_raw = pd.DataFrame(np.nan, index=range(0, 20), columns=list('ABCDEFGHIJ'))
        for _ in range(20):
            df_raw.iloc[random.randint(0, 19), random.randint(0, 9)] = random.randint(0, 99)

        df = from_pandas_df(df_raw, chunk_size=4)

        pd.testing.assert_frame_equal(self.executor.execute_dataframe(df.isna(), concat=True)[0],
                                      df_raw.isna())
        pd.testing.assert_frame_equal(self.executor.execute_dataframe(df.notna(), concat=True)[0],
                                      df_raw.notna())

        series_raw = pd.Series(np.nan, index=range(20))
        for _ in range(3):
            series_raw.iloc[random.randint(0, 19)] = random.randint(0, 99)

        series = from_pandas_series(series_raw, chunk_size=4)

        pd.testing.assert_series_equal(self.executor.execute_dataframe(series.isna(), concat=True)[0],
                                       series_raw.isna())
        pd.testing.assert_series_equal(self.executor.execute_dataframe(series.notna(), concat=True)[0],
                                       series_raw.notna())

    def testDropNA(self):
        # dataframe cases
        df_raw = pd.DataFrame(np.nan, index=range(0, 20), columns=list('ABCDEFGHIJ'))
        for _ in range(30):
            df_raw.iloc[random.randint(0, 19), random.randint(0, 9)] = random.randint(0, 99)
        for rowid in range(random.randint(1, 5)):
            row = random.randint(0, 19)
            for idx in range(0, 10):
                df_raw.iloc[row, idx] = random.randint(0, 99)

        # only one chunk in columns, can run dropna directly
        r = from_pandas_df(df_raw, chunk_size=(4, 10)).dropna()
        pd.testing.assert_frame_equal(self.executor.execute_dataframe(r, concat=True)[0],
                                      df_raw.dropna())

        # multiple chunks in columns, count() will be called first
        r = from_pandas_df(df_raw, chunk_size=4).dropna()
        pd.testing.assert_frame_equal(self.executor.execute_dataframe(r, concat=True)[0],
                                      df_raw.dropna())

        r = from_pandas_df(df_raw, chunk_size=4).dropna(how='all')
        pd.testing.assert_frame_equal(self.executor.execute_dataframe(r, concat=True)[0],
                                      df_raw.dropna(how='all'))

        r = from_pandas_df(df_raw, chunk_size=4).dropna(subset=list('ABFI'))
        pd.testing.assert_frame_equal(self.executor.execute_dataframe(r, concat=True)[0],
                                      df_raw.dropna(subset=list('ABFI')))

        r = from_pandas_df(df_raw, chunk_size=4).dropna(how='all', subset=list('BDHJ'))
        pd.testing.assert_frame_equal(self.executor.execute_dataframe(r, concat=True)[0],
                                      df_raw.dropna(how='all', subset=list('BDHJ')))

        r = from_pandas_df(df_raw, chunk_size=4)
        r.dropna(how='all', inplace=True)
        pd.testing.assert_frame_equal(self.executor.execute_dataframe(r, concat=True)[0],
                                      df_raw.dropna(how='all'))

        # series cases
        series_raw = pd.Series(np.nan, index=range(20))
        for _ in range(10):
            series_raw.iloc[random.randint(0, 19)] = random.randint(0, 99)

        r = from_pandas_series(series_raw, chunk_size=4).dropna()
        pd.testing.assert_series_equal(self.executor.execute_dataframe(r, concat=True)[0],
                                       series_raw.dropna())

        r = from_pandas_series(series_raw, chunk_size=4)
        r.dropna(inplace=True)
        pd.testing.assert_series_equal(self.executor.execute_dataframe(r, concat=True)[0],
                                       series_raw.dropna())

    def testCutExecution(self):
        rs = np.random.RandomState(0)
        raw = rs.random(15) * 1000
        s = pd.Series(raw, index=['i{}'.format(i) for i in range(15)])
        bins = [10, 100, 500]
        ii = pd.interval_range(10, 500, 3)
        labels = ['a', 'b']

        t = tensor(raw, chunk_size=4)
        series = from_pandas_series(s, chunk_size=4)
        iii = from_pandas_index(ii, chunk_size=2)

        # cut on Series
        r = cut(series, bins)
        result = self.executor.execute_dataframe(r, concat=True)[0]
        pd.testing.assert_series_equal(result, pd.cut(s, bins))

        r, b = cut(series, bins, retbins=True)
        r_result = self.executor.execute_dataframe(r, concat=True)[0]
        b_result = self.executor.execute_tensor(b, concat=True)[0]
        r_expected, b_expected = pd.cut(s, bins, retbins=True)
        pd.testing.assert_series_equal(r_result, r_expected)
        np.testing.assert_array_equal(b_result, b_expected)

        # cut on tensor
        r = cut(t, bins)
        # result and expected is array whose dtype is CategoricalDtype
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = pd.cut(raw, bins)
        self.assertEqual(len(result), len(expected))
        for r, e in zip(result, expected):
            np.testing.assert_equal(r, e)

        # one chunk
        r = cut(s, tensor(bins, chunk_size=2), right=False, include_lowest=True)
        result = self.executor.execute_dataframe(r, concat=True)[0]
        pd.testing.assert_series_equal(result, pd.cut(s, bins, right=False, include_lowest=True))

        # test labels
        r = cut(t, bins, labels=labels)
        # result and expected is array whose dtype is CategoricalDtype
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = pd.cut(raw, bins, labels=labels)
        self.assertEqual(len(result), len(expected))
        for r, e in zip(result, expected):
            np.testing.assert_equal(r, e)

        r = cut(t, bins, labels=False)
        # result and expected is array whose dtype is CategoricalDtype
        result = self.executor.execute_tensor(r, concat=True)[0]
        expected = pd.cut(raw, bins, labels=False)
        np.testing.assert_array_equal(result, expected)

        # test labels which is tensor
        labels_t = tensor(['a', 'b'], chunk_size=1)
        r = cut(raw, bins, labels=labels_t, include_lowest=True)
        # result and expected is array whose dtype is CategoricalDtype
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = pd.cut(raw, bins, labels=labels, include_lowest=True)
        self.assertEqual(len(result), len(expected))
        for r, e in zip(result, expected):
            np.testing.assert_equal(r, e)

        # test labels=False
        r, b = cut(raw, ii, labels=False, retbins=True)
        # result and expected is array whose dtype is CategoricalDtype
        r_result = self.executor.execute_tileable(r, concat=True)[0]
        b_result = self.executor.execute_tileable(b, concat=True)[0]
        r_expected, b_expected = pd.cut(raw, ii, labels=False, retbins=True)
        for r, e in zip(r_result, r_expected):
            np.testing.assert_equal(r, e)
        pd.testing.assert_index_equal(b_result, b_expected)

        # test bins which is md.IntervalIndex
        r, b = cut(series, iii, labels=tensor(labels, chunk_size=1), retbins=True)
        r_result = self.executor.execute_dataframe(r, concat=True)[0]
        b_result = self.executor.execute_dataframe(b, concat=True)[0]
        r_expected, b_expected = pd.cut(s, ii, labels=labels, retbins=True)
        pd.testing.assert_series_equal(r_result, r_expected)
        pd.testing.assert_index_equal(b_result, b_expected)

        # test duplicates
        bins2 = [0, 2, 4, 6, 10, 10]
        r, b = cut(s, bins2, labels=False, retbins=True,
                   right=False, duplicates='drop')
        r_result = self.executor.execute_dataframe(r, concat=True)[0]
        b_result = self.executor.execute_tensor(b, concat=True)[0]
        r_expected, b_expected = pd.cut(s, bins2, labels=False, retbins=True,
                                        right=False, duplicates='drop')
        pd.testing.assert_series_equal(r_result, r_expected)
        np.testing.assert_array_equal(b_result, b_expected)

        ctx, executor = self._create_test_context(self.executor)
        with ctx:
            # test integer bins
            r = cut(series, 3)
            result = executor.execute_dataframes([r])[0]
            pd.testing.assert_series_equal(result, pd.cut(s, 3))

            r, b = cut(series, 3, right=False, retbins=True)
            r_result, b_result = executor.execute_dataframes([r, b])
            r_expected, b_expected = pd.cut(s, 3, right=False, retbins=True)
            pd.testing.assert_series_equal(r_result, r_expected)
            np.testing.assert_array_equal(b_result, b_expected)

            # test min max same
            s2 = pd.Series([1.1] * 15)
            r = cut(s2, 3)
            result = executor.execute_dataframes([r])[0]
            pd.testing.assert_series_equal(result, pd.cut(s2, 3))

            # test inf exist
            s3 = s2.copy()
            s3[-1] = np.inf
            with self.assertRaises(ValueError):
                executor.execute_dataframes([cut(s3, 3)])

    def testShiftExecution(self):
        # test dataframe
        rs = np.random.RandomState(0)
        raw = pd.DataFrame(rs.randint(1000, size=(10, 8)),
                           columns=['col' + str(i + 1) for i in range(8)])

        df = from_pandas_df(raw, chunk_size=5)

        for periods in (2, -2, 6, -6):
            for axis in (0, 1):
                for fill_value in (None, 0, 1.):
                    r = df.shift(periods=periods, axis=axis,
                                 fill_value=fill_value)

                    try:
                        result = self.executor.execute_dataframe(r, concat=True)[0]
                        expected = raw.shift(periods=periods, axis=axis,
                                             fill_value=fill_value)
                        pd.testing.assert_frame_equal(result, expected)
                    except AssertionError as e:  # pragma: no cover
                        raise AssertionError(
                            'Failed when periods: {}, axis: {}, fill_value: {}'.format(
                                periods, axis, fill_value
                            )) from e

        raw2 = raw.copy()
        raw2.index = pd.date_range('2020-1-1', periods=10)
        raw2.columns = pd.date_range('2020-3-1', periods=8)

        df2 = from_pandas_df(raw2, chunk_size=5)

        # test freq not None
        for periods in (2, -2):
            for axis in (0, 1):
                for fill_value in (None, 0, 1.):
                    r = df2.shift(periods=periods, freq='D', axis=axis,
                                  fill_value=fill_value)

                    try:
                        result = self.executor.execute_dataframe(r, concat=True)[0]
                        expected = raw2.shift(periods=periods, freq='D', axis=axis,
                                              fill_value=fill_value)
                        pd.testing.assert_frame_equal(result, expected)
                    except AssertionError as e:  # pragma: no cover
                        raise AssertionError(
                            'Failed when periods: {}, axis: {}, fill_value: {}'.format(
                                periods, axis, fill_value
                            )) from e

        # test tshift
        r = df2.tshift(periods=1)
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = raw2.tshift(periods=1)
        pd.testing.assert_frame_equal(result, expected)

        with self.assertRaises(ValueError):
            _ = df.tshift(periods=1)

        # test series
        s = raw.iloc[:, 0]

        series = from_pandas_series(s, chunk_size=5)
        for periods in (0, 2, -2, 6, -6):
            for fill_value in (None, 0, 1.):
                r = series.shift(periods=periods, fill_value=fill_value)

                try:
                    result = self.executor.execute_dataframe(r, concat=True)[0]
                    expected = s.shift(periods=periods, fill_value=fill_value)
                    pd.testing.assert_series_equal(result, expected)
                except AssertionError as e:  # pragma: no cover
                    raise AssertionError(
                        'Failed when periods: {}, fill_value: {}'.format(
                            periods, fill_value
                        )) from e

        s2 = raw2.iloc[:, 0]

        # test freq not None
        series2 = from_pandas_series(s2, chunk_size=5)
        for periods in (2, -2):
            for fill_value in (None, 0, 1.):
                r = series2.shift(periods=periods, freq='D', fill_value=fill_value)

                try:
                    result = self.executor.execute_dataframe(r, concat=True)[0]
                    expected = s2.shift(periods=periods, freq='D', fill_value=fill_value)
                    pd.testing.assert_series_equal(result, expected)
                except AssertionError as e:  # pragma: no cover
                    raise AssertionError(
                        'Failed when periods: {}, fill_value: {}'.format(
                            periods, fill_value
                        )) from e
Ejemplo n.º 22
0
class Test(unittest.TestCase):
    def setUp(self):
        self.executor = ExecutorForTest('numpy')

    def testGammalnExecution(self):
        raw = np.random.rand(10, 8, 6)
        a = tensor(raw, chunk_size=3)

        r = gammaln(a)

        result = self.executor.execute_tensor(r, concat=True)[0]
        expected = scipy_gammaln(raw)

        np.testing.assert_array_equal(result, expected)

        # test sparse
        raw = sps.csr_matrix(np.array([0, 1.0, 1.01, np.nan]))
        a = tensor(raw, chunk_size=3)

        r = gammaln(a)

        result = self.executor.execute_tensor(r, concat=True)[0]

        data = scipy_gammaln(raw.data)
        expected = sps.csr_matrix((data, raw.indices, raw.indptr), raw.shape)

        np.testing.assert_array_equal(result.toarray(), expected.toarray())

    def testErfExecution(self):
        raw = np.random.rand(10, 8, 6)
        a = tensor(raw, chunk_size=3)

        r = erf(a)

        result = self.executor.execute_tensor(r, concat=True)[0]
        expected = scipy_erf(raw)

        np.testing.assert_array_equal(result, expected)

        # test sparse
        raw = sps.csr_matrix(np.array([0, 1.0, 1.01, np.nan]))
        a = tensor(raw, chunk_size=3)

        r = erf(a)

        result = self.executor.execute_tensor(r, concat=True)[0]

        data = scipy_erf(raw.data)
        expected = sps.csr_matrix((data, raw.indices, raw.indptr), raw.shape)

        np.testing.assert_array_equal(result.toarray(), expected.toarray())

    def testEntrExecution(self):
        raw = np.random.rand(10, 8, 6)
        a = tensor(raw, chunk_size=3)

        r = entr(a)

        result = self.executor.execute_tensor(r, concat=True)[0]
        expected = scipy_entr(raw)

        np.testing.assert_array_equal(result, expected)

        # test sparse
        raw = sps.csr_matrix(np.array([0, 1.0, 1.01, np.nan]))
        a = tensor(raw, chunk_size=3)

        r = entr(a)

        result = self.executor.execute_tensor(r, concat=True)[0]

        data = scipy_entr(raw.data)
        expected = sps.csr_matrix((data, raw.indices, raw.indptr), raw.shape)

        np.testing.assert_array_equal(result.toarray(), expected.toarray())

    def testRelEntrExecution(self):
        raw1 = np.random.rand(4, 3, 2)
        raw2 = np.random.rand(4, 3, 2)
        a = tensor(raw1, chunk_size=3)
        b = tensor(raw2, chunk_size=3)

        r = rel_entr(a, b)

        result = self.executor.execute_tensor(r, concat=True)[0]
        expected = scipy_rel_entr(raw1, raw2)

        np.testing.assert_array_equal(result, expected)

        # test sparse
        raw1 = sps.csr_matrix(
            np.array([0, 1.0, 1.01, np.nan] * 3).reshape(4, 3))
        a = tensor(raw1, chunk_size=3)
        raw2 = np.random.rand(4, 3)
        b = tensor(raw2, chunk_size=3)

        r = rel_entr(a, b)

        result = self.executor.execute_tensor(r, concat=True)[0]

        expected = scipy_rel_entr(raw1.toarray(), raw2)
        np.testing.assert_array_equal(result.toarray(), expected)
Ejemplo n.º 23
0
class Test(TestBase):
    def setUp(self):
        super().setUp()
        self.executor = ExecutorForTest('numpy')

    @unittest.skipIf(tiledb is None, 'tiledb not installed')
    def testStoreTileDBExecution(self):
        ctx = tiledb.Ctx()

        tempdir = tempfile.mkdtemp()
        try:
            # store TileDB dense array
            expected = np.random.rand(8, 4, 3)
            a = tensor(expected, chunk_size=(3, 3, 2))
            save = totiledb(tempdir, a, ctx=ctx)
            self.executor.execute_tensor(save)

            with tiledb.DenseArray(uri=tempdir, ctx=ctx) as arr:
                np.testing.assert_allclose(expected, arr.read_direct())
        finally:
            shutil.rmtree(tempdir)

        tempdir = tempfile.mkdtemp()
        try:
            # store tensor with 1 chunk to TileDB dense array
            a = arange(12)
            save = totiledb(tempdir, a, ctx=ctx)
            self.executor.execute_tensor(save)

            with tiledb.DenseArray(uri=tempdir, ctx=ctx) as arr:
                np.testing.assert_allclose(np.arange(12), arr.read_direct())
        finally:
            shutil.rmtree(tempdir)

        tempdir = tempfile.mkdtemp()
        try:
            # store 2-d TileDB sparse array
            expected = sps.random(8, 7, density=0.1)
            a = tensor(expected, chunk_size=(3, 5))
            save = totiledb(tempdir, a, ctx=ctx)
            self.executor.execute_tensor(save)

            with tiledb.SparseArray(uri=tempdir, ctx=ctx) as arr:
                data = arr[:, :]
                coords = data['coords']
                value = data[arr.attr(0).name]
                ij = tuple(coords[arr.domain.dim(k).name]
                           for k in range(arr.ndim))
                result = sps.coo_matrix((value, ij), shape=arr.shape)

                np.testing.assert_allclose(expected.toarray(),
                                           result.toarray())
        finally:
            shutil.rmtree(tempdir)

        tempdir = tempfile.mkdtemp()
        try:
            # store TileDB dense array
            expected = np.asfortranarray(np.random.rand(8, 4, 3))
            a = tensor(expected, chunk_size=(3, 3, 2))
            save = totiledb(tempdir, a, ctx=ctx)
            self.executor.execute_tensor(save)

            with tiledb.DenseArray(uri=tempdir, ctx=ctx) as arr:
                np.testing.assert_allclose(expected, arr.read_direct())
                self.assertEqual(arr.schema.cell_order, 'col-major')
        finally:
            shutil.rmtree(tempdir)

    @unittest.skipIf(h5py is None, 'h5py not installed')
    def testStoreHDF5Execution(self):
        raw = np.random.RandomState(0).rand(10, 20)

        group_name = 'test_group'
        dataset_name = 'test_dataset'

        t1 = tensor(raw, chunk_size=20)
        t2 = tensor(raw, chunk_size=9)

        with self.assertRaises(TypeError):
            tohdf5(object(), t2)

        ctx, executor = self._create_test_context(self.executor)
        with ctx:
            with tempfile.TemporaryDirectory() as d:
                filename = os.path.join(d,
                                        f'test_store_{int(time.time())}.hdf5')

                # test 1 chunk
                r = tohdf5(filename,
                           t1,
                           group=group_name,
                           dataset=dataset_name)

                executor.execute_tensor(r)

                with h5py.File(filename, 'r') as f:
                    result = np.asarray(f[f'{group_name}/{dataset_name}'])
                    np.testing.assert_array_equal(result, raw)

                # test filename
                r = tohdf5(filename,
                           t2,
                           group=group_name,
                           dataset=dataset_name)

                executor.execute_tensor(r)

                rt = get_tiled(r)
                self.assertEqual(
                    type(rt.chunks[0].inputs[1].op).__name__,
                    'SuccessorsExclusive')
                self.assertEqual(len(rt.chunks[0].inputs[1].inputs), 0)

                with h5py.File(filename, 'r') as f:
                    result = np.asarray(f[f'{group_name}/{dataset_name}'])
                    np.testing.assert_array_equal(result, raw)

                with self.assertRaises(ValueError):
                    tohdf5(filename, t2)

                with h5py.File(filename, 'r') as f:
                    # test file
                    r = tohdf5(f, t2, group=group_name, dataset=dataset_name)

                executor.execute_tensor(r)

                with h5py.File(filename, 'r') as f:
                    result = np.asarray(f[f'{group_name}/{dataset_name}'])
                    np.testing.assert_array_equal(result, raw)

                with self.assertRaises(ValueError):
                    with h5py.File(filename, 'r') as f:
                        tohdf5(f, t2)

                with h5py.File(filename, 'r') as f:
                    # test dataset
                    ds = f[f'{group_name}/{dataset_name}']
                    # test file
                    r = tohdf5(ds, t2)

                executor.execute_tensor(r)

                with h5py.File(filename, 'r') as f:
                    result = np.asarray(f[f'{group_name}/{dataset_name}'])
                    np.testing.assert_array_equal(result, raw)

    @unittest.skipIf(zarr is None, 'zarr not installed')
    def testStoreZarrExecution(self):
        raw = np.random.RandomState(0).rand(10, 20)

        group_name = 'test_group'
        dataset_name = 'test_dataset'

        t = tensor(raw, chunk_size=6)

        with self.assertRaises(TypeError):
            tozarr(object(), t)

        with tempfile.TemporaryDirectory() as d:
            filename = os.path.join(d, f'test_store_{int(time.time())}.zarr')
            path = f'{filename}/{group_name}/{dataset_name}'

            r = tozarr(filename,
                       t,
                       group=group_name,
                       dataset=dataset_name,
                       compressor=Zstd(level=3))
            self.executor.execute_tensor(r)

            arr = zarr.open(path)
            np.testing.assert_array_equal(arr, raw)
            self.assertEqual(arr.compressor, Zstd(level=3))

            r = tozarr(path, t + 2)
            self.executor.execute_tensor(r)

            arr = zarr.open(path)
            np.testing.assert_array_equal(arr, raw + 2)

            filters = [Delta(dtype='i4')]
            compressor = Blosc(cname='zstd', clevel=1, shuffle=Blosc.SHUFFLE)
            arr = zarr.open(path, compressor=compressor, filters=filters)

            r = tozarr(arr, t + 1)
            self.executor.execute_tensor(r)
            result = zarr.open_array(path)
            np.testing.assert_array_equal(result, raw + 1)

    @unittest.skipIf(vineyard is None, 'vineyard not installed')
    @flaky(max_runs=3)
    def testToVineyard(self):
        def run_with_given_session(session, **kw):
            ipc_socket = os.environ.get('VINEYARD_IPC_SOCKET',
                                        '/tmp/vineyard/vineyard.sock')
            with option_context({'vineyard.socket': ipc_socket}):
                tensor1 = tensor(np.arange(12).reshape(3, 4), chunk_size=2)
                object_id = tovineyard(tensor1).execute(
                    session=session, **kw).fetch(session=session)
                tensor2 = from_vineyard(object_id)

                tensor1_value = tensor1.execute(session=session,
                                                **kw).fetch(session=session)
                tensor2_value = tensor2.execute(session=session,
                                                **kw).fetch(session=session)
                np.testing.assert_array_equal(tensor1_value, tensor2_value)

        with new_session().as_default() as session:
            run_with_given_session(session)

        with new_cluster(scheduler_n_process=2,
                         worker_n_process=2,
                         shared_memory='20M',
                         web=False) as cluster:
            with new_session(cluster.endpoint).as_default() as session:
                run_with_given_session(session, timeout=_exec_timeout)
Ejemplo n.º 24
0
class Test(unittest.TestCase):
    def setUp(self) -> None:
        self.executor = ExecutorForTest('numpy')

    def testManualBuildFaissIndex(self):
        d = 8
        n = 50
        n_test = 10
        x = np.random.RandomState(0).rand(n, d).astype(np.float32)
        y = np.random.RandomState(0).rand(n_test, d).astype(np.float32)

        nn = NearestNeighbors(algorithm='kd_tree')
        nn.fit(x)
        _, expected_indices = nn.kneighbors(y, 5)

        for index_type in ['object', 'filename', 'bytes']:
            # test brute-force search
            X = mt.tensor(x, chunk_size=10)
            index = build_faiss_index(X, 'Flat', None, random_state=0,
                                      same_distribution=True, return_index_type=index_type)
            faiss_index = self.executor.execute_tileable(index)

            index_shards = faiss.IndexShards(d)
            for ind in faiss_index:
                shard = _load_index(None, index.op, ind, -1)
                index_shards.add_shard(shard)
            faiss_index = index_shards

            faiss_index.nprob = 10
            _, indices = faiss_index.search(y, k=5)

            np.testing.assert_array_equal(indices, expected_indices.fetch())

        # test one chunk, brute force
        X = mt.tensor(x, chunk_size=50)
        index = build_faiss_index(X, 'Flat', None, random_state=0,
                                  same_distribution=True, return_index_type='object')
        faiss_index = self.executor.execute_tileable(index)[0]

        faiss_index.nprob = 10
        _, indices = faiss_index.search(y, k=5)

        np.testing.assert_array_equal(indices, expected_indices.fetch())

        # test train, same distribution
        X = mt.tensor(x, chunk_size=10)
        index = build_faiss_index(X, 'IVF30,Flat', 30, random_state=0,
                                  same_distribution=True, return_index_type='object')
        faiss_index = self.executor.execute_tileable(index)[0]

        self.assertIsInstance(faiss_index, faiss.IndexIVFFlat)
        self.assertEqual(faiss_index.ntotal, n)
        self.assertEqual(len(get_tiled(index).chunks), 1)

        # test train, distributions are variant
        X = mt.tensor(x, chunk_size=10)
        index = build_faiss_index(X, 'IVF10,Flat', None, random_state=0,
                                  same_distribution=False, return_index_type='object')
        faiss_index = self.executor.execute_tileable(index)

        self.assertEqual(len(faiss_index), 5)
        for ind in faiss_index:
            self.assertIsInstance(ind, faiss.IndexIVFFlat)
            self.assertEqual(ind.ntotal, 10)

        # test one chunk, train
        X = mt.tensor(x, chunk_size=50)
        index = build_faiss_index(X, 'IVF30,Flat', 30, random_state=0,
                                  same_distribution=True, return_index_type='object')
        faiss_index = self.executor.execute_tileable(index)[0]

        self.assertIsInstance(faiss_index, faiss.IndexIVFFlat)
        self.assertEqual(faiss_index.ntotal, n)

        # test wrong index
        with self.assertRaises(ValueError):
            build_faiss_index(X, 'unknown_index', None)

        # test unknown metric
        with self.assertRaises(ValueError):
            build_faiss_index(X, 'Flat', None, metric='unknown_metric')

    def testFaissQuery(self):
        d = 8
        n = 50
        n_test = 10
        x = np.random.RandomState(0).rand(n, d).astype(np.float32)
        y = np.random.RandomState(1).rand(n_test, d).astype(np.float32)

        test_tensors = [
            # multi chunks
            (mt.tensor(x, chunk_size=(20, 5)), mt.tensor(y, chunk_size=5)),
            # one chunk
            (mt.tensor(x, chunk_size=50), mt.tensor(y, chunk_size=10))
        ]

        for X, Y in test_tensors:
            for metric in ['l2', 'cosine']:
                faiss_index = build_faiss_index(X, 'Flat', None, metric=metric,
                                                random_state=0, return_index_type='object')
                d, i = faiss_query(faiss_index, Y, 5, nprobe=10)
                distance, indices = self.executor.execute_tensors([d, i])

                nn = NearestNeighbors(metric=metric)
                nn.fit(x)
                expected_distance, expected_indices = nn.kneighbors(y, 5)

                np.testing.assert_array_equal(indices, expected_indices.fetch())
                np.testing.assert_almost_equal(distance, expected_distance.fetch())

    def testGenIndexStringAndSampleCount(self):
        d = 32

        # accuracy=True, could be Flat only
        ret = _gen_index_string_and_sample_count((10 ** 9, d), None, True, 'minimum')
        self.assertEqual(ret, ('Flat', None))

        # no memory concern
        ret = _gen_index_string_and_sample_count((10 ** 5, d), None, False, 'maximum')
        self.assertEqual(ret, ('HNSW32', None))
        index = faiss.index_factory(d, ret[0])
        self.assertTrue(index.is_trained)

        # memory concern not much
        ret = _gen_index_string_and_sample_count((10 ** 5, d), None, False, 'high')
        self.assertEqual(ret, ('IVF1580,Flat', 47400))
        index = faiss.index_factory(d, ret[0])
        self.assertFalse(index.is_trained)

        # memory quite important
        ret = _gen_index_string_and_sample_count((5 * 10 ** 6, d), None, False, 'low')
        self.assertEqual(ret, ('PCAR16,IVF65536_HNSW32,SQ8', 32 * 65536))
        index = faiss.index_factory(d, ret[0])
        self.assertFalse(index.is_trained)

        # memory very important
        ret = _gen_index_string_and_sample_count((10 ** 8, d), None, False, 'minimum')
        self.assertEqual(ret, ('OPQ16_32,IVF1048576_HNSW32,PQ16', 64 * 65536))
        index = faiss.index_factory(d, ret[0])
        self.assertFalse(index.is_trained)

        ret = _gen_index_string_and_sample_count((10 ** 10, d), None, False, 'low')
        self.assertEqual(ret, ('PCAR16,IVF1048576_HNSW32,SQ8', 64 * 65536))
        index = faiss.index_factory(d, ret[0])
        self.assertFalse(index.is_trained)

        with self.assertRaises(ValueError):
            # M > 64 raise error
            _gen_index_string_and_sample_count((10 ** 5, d), None, False, 'maximum', M=128)

        with self.assertRaises(ValueError):
            # M > 64
            _gen_index_string_and_sample_count((10 ** 5, d), None, False, 'minimum', M=128)

        with self.assertRaises(ValueError):
            # dim should be multiple of M
            _gen_index_string_and_sample_count((10 ** 5, d), None, False, 'minimum', M=16, dim=17)

        with self.assertRaises(ValueError):
            _gen_index_string_and_sample_count((10 ** 5, d), None, False, 'low', k=5)

    def testAutoIndex(self):
        d = 8
        n = 50
        n_test = 10
        x = np.random.RandomState(0).rand(n, d).astype(np.float32)
        y = np.random.RandomState(1).rand(n_test, d).astype(np.float32)

        for chunk_size in (50, 20):
            X = mt.tensor(x, chunk_size=chunk_size)

            faiss_index = build_faiss_index(X, random_state=0, return_index_type='object')
            d, i = faiss_query(faiss_index, y, 5, nprobe=10)
            indices = self.executor.execute_tensor(i, concat=True)[0]

            nn = NearestNeighbors()
            nn.fit(x)
            expected_indices = nn.kneighbors(y, 5, return_distance=False)

            np.testing.assert_array_equal(indices, expected_indices)
Ejemplo n.º 25
0
class Test(TestBase):
    def setUp(self):
        self.executor = ExecutorForTest('numpy')
        self.old_chunk = options.chunk_size
        options.chunk_size = 10

    def tearDown(self):
        options.chunk_size = self.old_chunk

    def testBoolIndexingExecution(self):
        raw = np.random.random((11, 8, 12, 14))
        arr = tensor(raw, chunk_size=3)

        index = arr < .5
        arr2 = arr[index]
        size_res = self.executor.execute_tensor(arr2, mock=True)
        res = self.executor.execute_tensor(arr2)

        self.assertEqual(sum(s[0] for s in size_res), arr.nbytes)
        np.testing.assert_array_equal(np.sort(np.concatenate(res)),
                                      np.sort(raw[raw < .5]))

        index2 = tensor(raw[:, :, 0, 0], chunk_size=3) < .5
        arr3 = arr[index2]
        res = self.executor.execute_tensor(arr3, concat=True)[0]

        expected = raw[raw[:, :, 0, 0] < .5]
        self.assertEqual(sum(it.size for it in res), expected.size)
        self.assertEqual(res.shape, expected.shape)

        raw = np.asfortranarray(np.random.random((11, 8, 12, 14)))
        arr = tensor(raw, chunk_size=3)

        index = tensor(raw[:, :, 0, 0], chunk_size=3) < .5
        arr2 = arr[index]
        res = self.executor.execute_tensor(arr2, concat=True)[0]
        expected = raw[raw[:, :, 0, 0] < .5].copy('A')

        self.assertEqual(res.flags['C_CONTIGUOUS'],
                         expected.flags['C_CONTIGUOUS'])
        self.assertEqual(res.flags['F_CONTIGUOUS'],
                         expected.flags['F_CONTIGUOUS'])

    def testFancyIndexingNumpyExecution(self):
        # test fancy index of type numpy ndarray
        raw = np.random.random((11, 8, 12, 14))
        arr = tensor(raw, chunk_size=(2, 3, 2, 3))

        index = [9, 10, 3, 1, 8, 10]
        arr2 = arr[index]

        res = self.executor.execute_tensor(arr2, concat=True)[0]
        np.testing.assert_array_equal(res, raw[index])

        index = np.random.permutation(8)
        arr3 = arr[:2, ..., index]

        res = self.executor.execute_tensor(arr3, concat=True)[0]
        np.testing.assert_array_equal(res, raw[:2, ..., index])

        index = [1, 3, 9, 10]
        arr4 = arr[..., index, :5]

        res = self.executor.execute_tensor(arr4, concat=True)[0]
        np.testing.assert_array_equal(res, raw[..., index, :5])

        index1 = [8, 10, 3, 1, 9, 10]
        index2 = [1, 3, 9, 10, 2, 7]
        arr5 = arr[index1, :, index2]

        res = self.executor.execute_tensor(arr5, concat=True)[0]
        np.testing.assert_array_equal(res, raw[index1, :, index2])

        index1 = [1, 3, 5, 7, 9, 10]
        index2 = [1, 9, 9, 10, 2, 7]
        arr6 = arr[index1, :, index2]

        res = self.executor.execute_tensor(arr6, concat=True)[0]
        np.testing.assert_array_equal(res, raw[index1, :, index2])
        # fancy index is ordered, no concat required
        self.assertGreater(len(get_tiled(arr6).nsplits[0]), 1)

        index1 = [[8, 10, 3], [1, 9, 10]]
        index2 = [[1, 3, 9], [10, 2, 7]]
        arr7 = arr[index1, :, index2]

        res = self.executor.execute_tensor(arr7, concat=True)[0]
        np.testing.assert_array_equal(res, raw[index1, :, index2])

        index1 = [[1, 3], [3, 7], [7, 7]]
        index2 = [1, 9]
        arr8 = arr[0, index1, :, index2]

        res = self.executor.execute_tensor(arr8, concat=True)[0]
        np.testing.assert_array_equal(res, raw[0, index1, :, index2])

    def testFancyIndexingTensorExecution(self):
        # test fancy index of type tensor

        raw = np.random.random((11, 8, 12, 14))
        arr = tensor(raw, chunk_size=(2, 3, 2, 3))

        raw_index = [8, 10, 3, 1, 9, 10]
        index = tensor(raw_index, chunk_size=4)
        arr2 = arr[index]

        res = self.executor.execute_tensor(arr2, concat=True)[0]
        np.testing.assert_array_equal(res, raw[raw_index])

        raw_index = np.random.permutation(8)
        index = tensor(raw_index, chunk_size=3)
        arr3 = arr[:2, ..., index]

        res = self.executor.execute_tensor(arr3, concat=True)[0]
        np.testing.assert_array_equal(res, raw[:2, ..., raw_index])

        raw_index = [1, 3, 9, 10]
        index = tensor(raw_index)
        arr4 = arr[..., index, :5]

        res = self.executor.execute_tensor(arr4, concat=True)[0]
        np.testing.assert_array_equal(res, raw[..., raw_index, :5])

        raw_index1 = [8, 10, 3, 1, 9, 10]
        raw_index2 = [1, 3, 9, 10, 2, 7]
        index1 = tensor(raw_index1, chunk_size=4)
        index2 = tensor(raw_index2, chunk_size=3)
        arr5 = arr[index1, :, index2]

        res = self.executor.execute_tensor(arr5, concat=True)[0]
        np.testing.assert_array_equal(res, raw[raw_index1, :, raw_index2])

        raw_index1 = [1, 3, 5, 7, 9, 10]
        raw_index2 = [1, 9, 9, 10, 2, 7]
        index1 = tensor(raw_index1, chunk_size=3)
        index2 = tensor(raw_index2, chunk_size=4)
        arr6 = arr[index1, :, index2]

        res = self.executor.execute_tensor(arr6, concat=True)[0]
        np.testing.assert_array_equal(res, raw[raw_index1, :, raw_index2])

        raw_index1 = [[8, 10, 3], [1, 9, 10]]
        raw_index2 = [[1, 3, 9], [10, 2, 7]]
        index1 = tensor(raw_index1)
        index2 = tensor(raw_index2, chunk_size=2)
        arr7 = arr[index1, :, index2]

        res = self.executor.execute_tensor(arr7, concat=True)[0]
        np.testing.assert_array_equal(res, raw[raw_index1, :, raw_index2])

        raw_index1 = [[1, 3], [3, 7], [7, 7]]
        raw_index2 = [1, 9]
        index1 = tensor(raw_index1, chunk_size=(2, 1))
        index2 = tensor(raw_index2)
        arr8 = arr[0, index1, :, index2]

        res = self.executor.execute_tensor(arr8, concat=True)[0]
        np.testing.assert_array_equal(res, raw[0, raw_index1, :, raw_index2])

        raw_a = np.random.rand(30, 30)
        a = tensor(raw_a, chunk_size=(13, 17))
        b = a.argmax(axis=0)
        c = a[b, arange(30)]
        res = self.executor.execute_tensor(c, concat=True)[0]

        np.testing.assert_array_equal(
            res, raw_a[raw_a.argmax(axis=0),
                       np.arange(30)])

        # test one chunk
        arr = tensor(raw, chunk_size=20)

        raw_index = [8, 10, 3, 1, 9, 10]
        index = tensor(raw_index, chunk_size=20)
        arr9 = arr[index]

        res = self.executor.execute_tensor(arr9, concat=True)[0]
        np.testing.assert_array_equal(res, raw[raw_index])

        raw_index1 = [[1, 3], [3, 7], [7, 7]]
        raw_index2 = [1, 9]
        index1 = tensor(raw_index1)
        index2 = tensor(raw_index2)
        arr10 = arr[0, index1, :, index2]

        res = self.executor.execute_tensor(arr10, concat=True)[0]
        np.testing.assert_array_equal(res, raw[0, raw_index1, :, raw_index2])

        # test order
        raw = np.asfortranarray(np.random.random((11, 8, 12, 14)))
        arr = tensor(raw, chunk_size=(2, 3, 2, 3))

        raw_index = [8, 10, 3, 1, 9, 10]
        index = tensor(raw_index, chunk_size=4)
        arr11 = arr[index]

        res = self.executor.execute_tensor(arr11, concat=True)[0]
        expected = raw[raw_index].copy('A')
        np.testing.assert_array_equal(res, expected)
        self.assertEqual(res.flags['C_CONTIGUOUS'],
                         expected.flags['C_CONTIGUOUS'])
        self.assertEqual(res.flags['F_CONTIGUOUS'],
                         expected.flags['F_CONTIGUOUS'])

    def testSliceExecution(self):
        raw = np.random.random((11, 8, 12, 14))
        arr = tensor(raw, chunk_size=3)

        arr2 = arr[2:9:2, 3:7, -1:-9:-2, 12:-11:-4]
        res = self.executor.execute_tensor(arr2, concat=True)[0]

        np.testing.assert_array_equal(res, raw[2:9:2, 3:7, -1:-9:-2,
                                               12:-11:-4])

        arr3 = arr[-4, 2:]
        res = self.executor.execute_tensor(arr3, concat=True)[0]
        np.testing.assert_equal(res, raw[-4, 2:])

        raw = sps.random(12, 14, density=.1)
        arr = tensor(raw, chunk_size=3)

        arr2 = arr[-1:-9:-2, 12:-11:-4]
        res = self.executor.execute_tensor(arr2, concat=True)[0]

        np.testing.assert_equal(res.toarray(),
                                raw.toarray()[-1:-9:-2, 12:-11:-4])

        # test order
        raw = np.asfortranarray(np.random.random((11, 8, 12, 14)))
        arr = tensor(raw, chunk_size=3)

        arr2 = arr[2:9:2, 3:7, -1:-9:-2, 12:-11:-4]
        res = self.executor.execute_tensor(arr2, concat=True)[0]
        expected = raw[2:9:2, 3:7, -1:-9:-2, 12:-11:-4].copy('A')

        np.testing.assert_array_equal(res, expected)
        self.assertEqual(res.flags['C_CONTIGUOUS'],
                         expected.flags['C_CONTIGUOUS'])
        self.assertEqual(res.flags['F_CONTIGUOUS'],
                         expected.flags['F_CONTIGUOUS'])

        arr3 = arr[0:13, :, None]
        res = self.executor.execute_tensor(arr3, concat=True)[0]
        expected = raw[0:13, :, None].copy('A')

        np.testing.assert_array_equal(res, expected)
        self.assertEqual(res.flags['C_CONTIGUOUS'],
                         expected.flags['C_CONTIGUOUS'])
        self.assertEqual(res.flags['F_CONTIGUOUS'],
                         expected.flags['F_CONTIGUOUS'])

    def testMixedIndexingExecution(self):
        rs = np.random.RandomState(0)
        raw = rs.random((11, 8, 12, 13))
        arr = tensor(raw, chunk_size=3)

        raw_cond = raw[0, :, 0, 0] < .5
        cond = tensor(raw[0, :, 0, 0], chunk_size=3) < .5
        arr2 = arr[10::-2, cond, None, ..., :5]
        size_res = self.executor.execute_tensor(arr2, mock=True)
        res = self.executor.execute_tensor(arr2, concat=True)[0]

        new_shape = list(arr2.shape)
        new_shape[1] = cond.shape[0]
        self.assertEqual(sum(s[0] for s in size_res),
                         int(np.prod(new_shape) * arr2.dtype.itemsize))
        np.testing.assert_array_equal(res, raw[10::-2, raw_cond, None,
                                               ..., :5])

        b_raw = np.random.random(8)
        raw_cond = b_raw < .5
        conds = [raw_cond, tensor(b_raw, chunk_size=2) < .5]
        for cond in conds:
            arr3 = arr[-2::-3, cond, ...]
            res = self.executor.execute_tensor(arr3, concat=True)[0]

            np.testing.assert_array_equal(res, raw[-2::-3, raw_cond, ...])

        # test multiple bool index and fancy index
        cond1 = np.zeros(11, dtype=bool)
        cond1[rs.permutation(11)[:5]] = True
        cond2 = np.zeros(12, dtype=bool)
        cond2[rs.permutation(12)[:5]] = True
        f3 = np.random.randint(13, size=5)

        expected = raw[cond1, ..., cond2, f3]

        t = arr[cond1, ..., cond2, f3]
        res = self.executor.execute_tensor(t, concat=True)[0]
        np.testing.assert_array_equal(res, expected)

        ctx, executor = self._create_test_context(self.executor)
        with ctx:
            t = arr[tensor(cond1), ..., tensor(cond2), tensor(f3)]
            res = executor.execute_tensors([t])[0]
            np.testing.assert_array_equal(res, expected)

    def testSetItemExecution(self):
        rs = np.random.RandomState(0)

        raw = data = rs.randint(0, 10, size=(11, 8, 12, 13))
        arr = tensor(raw.copy(), chunk_size=3)
        raw = raw.copy()

        idx = slice(2, 9, 2), slice(3, 7), slice(-1, -9, -2), 2
        arr[idx] = 20
        res = self.executor.execute_tensor(arr, concat=True)[0]

        raw[idx] = 20
        np.testing.assert_array_equal(res, raw)
        self.assertEqual(res.flags['C_CONTIGUOUS'], raw.flags['C_CONTIGUOUS'])
        self.assertEqual(res.flags['F_CONTIGUOUS'], raw.flags['F_CONTIGUOUS'])

        raw = data
        shape = raw[idx].shape

        arr2 = tensor(raw.copy(), chunk_size=3)
        raw = raw.copy()

        replace = rs.randint(10, 20, size=shape[:-1] + (1, )).astype('f4')
        arr2[idx] = tensor(replace, chunk_size=4)
        res = self.executor.execute_tensor(arr2, concat=True)[0]

        raw[idx] = replace
        np.testing.assert_array_equal(res, raw)

        raw = np.asfortranarray(np.random.randint(0, 10, size=(11, 8, 12, 13)))
        arr = tensor(raw.copy('A'), chunk_size=3)
        raw = raw.copy('A')

        idx = slice(2, 9, 2), slice(3, 7), slice(-1, -9, -2), 2
        arr[idx] = 20
        res = self.executor.execute_tensor(arr, concat=True)[0]

        raw[idx] = 20
        np.testing.assert_array_equal(res, raw)
        self.assertEqual(res.flags['C_CONTIGUOUS'], raw.flags['C_CONTIGUOUS'])
        self.assertEqual(res.flags['F_CONTIGUOUS'], raw.flags['F_CONTIGUOUS'])

        # test bool indexing set
        raw = data

        arr = tensor(raw.copy(), chunk_size=3)
        raw1 = rs.rand(11)
        arr[tensor(raw1, chunk_size=4) < 0.6, 2:7] = 3
        res = self.executor.execute_tileable(arr, concat=True)[0]

        raw[raw1 < 0.6, 2:7] = 3
        np.testing.assert_array_equal(res, raw)

        raw = np.random.randint(3, size=10).astype(np.int64)
        raw2 = np.arange(3)

        arr = zeros((10, 3))
        arr[tensor(raw) == 1, tensor(raw2) == 1] = 1
        res = self.executor.execute_tileable(arr, concat=True)[0]

        expected = np.zeros((10, 3))
        expected[raw == 1, raw2 == 1] = 1
        np.testing.assert_array_equal(res, expected)

        ctx, executor = self._create_test_context(self.executor)
        with ctx:
            raw = data

            arr = tensor(raw.copy(), chunk_size=3)
            raw1 = rs.rand(11)
            set_data = rs.rand((raw1 < 0.8).sum(), 8, 12, 13)
            arr[tensor(raw1, chunk_size=4) < 0.8] = tensor(set_data)

            res = self.executor.execute_tileables([arr])[0]

            raw[raw1 < 0.8] = set_data
            np.testing.assert_array_equal(res, raw)

        # test error
        with self.assertRaises(ValueError):
            t = tensor(raw, chunk_size=3)
            t[0, 0, 0, 0] = zeros(2, chunk_size=10)
            _ = self.executor.execute_tensor(t)

    def testSetItemStructuredExecution(self):
        rec_type = np.dtype([('a', np.int32), ('b', np.double),
                             ('c', np.dtype([('a', np.int16),
                                             ('b', np.int64)]))])

        raw = np.zeros((4, 5), dtype=rec_type)
        arr = tensor(raw.copy(), chunk_size=3)

        arr[1:4, 1] = (3, 4., (5, 6))
        arr[1:4, 2] = 8
        arr[1:3] = np.arange(5)
        arr[2:4] = np.arange(10).reshape(2, 5)
        arr[0] = np.arange(5)

        raw[1:4, 1] = (3, 4., (5, 6))
        raw[1:4, 2] = 8
        raw[1:3] = np.arange(5)
        raw[2:4] = np.arange(10).reshape(2, 5)
        raw[0] = np.arange(5)

        res = self.executor.execute_tensor(arr, concat=True)[0]
        self.assertEqual(arr.dtype, raw.dtype)
        self.assertEqual(arr.shape, raw.shape)
        np.testing.assert_array_equal(res, raw)

    def testTakeExecution(self):
        data = np.random.rand(10, 20, 30)
        t = tensor(data, chunk_size=10)

        a = t.take([4, 1, 2, 6, 200])

        res = self.executor.execute_tensor(a, concat=True)[0]
        expected = np.take(data, [4, 1, 2, 6, 200])
        np.testing.assert_array_equal(res, expected)

        a = take(t, [5, 19, 2, 13], axis=1)

        res = self.executor.execute_tensor(a, concat=True)[0]
        expected = np.take(data, [5, 19, 2, 13], axis=1)
        np.testing.assert_array_equal(res, expected)

        with self.assertRaises(ValueError):
            take(t, [1, 3, 4], out=tensor(np.random.rand(4)))

        out = tensor([1, 2, 3, 4])
        a = take(t, [4, 19, 2, 8], out=out)

        res = self.executor.execute_tensor(out, concat=True)[0]
        expected = np.take(data, [4, 19, 2, 8])
        np.testing.assert_array_equal(res, expected)

    def testCompressExecution(self):
        data = np.array([[1, 2], [3, 4], [5, 6]])
        a = tensor(data, chunk_size=1)

        t = compress([0, 1], a, axis=0)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.compress([0, 1], data, axis=0)
        np.testing.assert_array_equal(res, expected)

        t = compress([0, 1], a, axis=1)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.compress([0, 1], data, axis=1)
        np.testing.assert_array_equal(res, expected)

        t = a.compress([0, 1, 1])

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.compress([0, 1, 1], data)
        np.testing.assert_array_equal(res, expected)

        t = compress([False, True, True], a, axis=0)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.compress([False, True, True], data, axis=0)
        np.testing.assert_array_equal(res, expected)

        t = compress([False, True], a, axis=1)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.compress([False, True], data, axis=1)
        np.testing.assert_array_equal(res, expected)

        with self.assertRaises(np.AxisError):
            compress([0, 1, 1], a, axis=1)

        # test order
        data = np.asfortranarray([[1, 2], [3, 4], [5, 6]])
        a = tensor(data, chunk_size=1)

        t = compress([0, 1, 1], a, axis=0)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.compress([0, 1, 1], data, axis=0)
        np.testing.assert_array_equal(res, expected)
        self.assertEqual(res.flags['C_CONTIGUOUS'],
                         expected.flags['C_CONTIGUOUS'])
        self.assertEqual(res.flags['F_CONTIGUOUS'],
                         expected.flags['F_CONTIGUOUS'])

        t = compress([0, 1, 1],
                     a,
                     axis=0,
                     out=tensor(np.empty((2, 2), order='F', dtype=int)))

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.compress([0, 1, 1],
                               data,
                               axis=0,
                               out=np.empty((2, 2), order='F', dtype=int))
        np.testing.assert_array_equal(res, expected)
        self.assertEqual(res.flags['C_CONTIGUOUS'],
                         expected.flags['C_CONTIGUOUS'])
        self.assertEqual(res.flags['F_CONTIGUOUS'],
                         expected.flags['F_CONTIGUOUS'])

    def testExtractExecution(self):
        data = np.arange(12).reshape((3, 4))
        a = tensor(data, chunk_size=2)
        condition = mod(a, 3) == 0

        t = extract(condition, a)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.extract(np.mod(data, 3) == 0, data)
        np.testing.assert_array_equal(res, expected)

    def testChooseExecution(self):
        options.chunk_size = 2

        choices = [[0, 1, 2, 3], [10, 11, 12, 13], [20, 21, 22, 23],
                   [30, 31, 32, 33]]
        a = choose([2, 3, 1, 0], choices)

        res = self.executor.execute_tensor(a, concat=True)[0]
        expected = np.choose([2, 3, 1, 0], choices)

        np.testing.assert_array_equal(res, expected)

        a = choose([2, 4, 1, 0], choices, mode='clip')  # 4 goes to 3 (4-1)
        expected = np.choose([2, 4, 1, 0], choices, mode='clip')

        res = self.executor.execute_tensor(a, concat=True)[0]
        np.testing.assert_array_equal(res, expected)

        a = choose([2, 4, 1, 0], choices, mode='wrap')  # 4 goes to (4 mod 4)
        expected = np.choose([2, 4, 1, 0], choices,
                             mode='wrap')  # 4 goes to (4 mod 4)

        res = self.executor.execute_tensor(a, concat=True)[0]
        np.testing.assert_array_equal(res, expected)

        a = [[1, 0, 1], [0, 1, 0], [1, 0, 1]]
        choices = [-10, 10]

        b = choose(a, choices)
        expected = np.choose(a, choices)

        res = self.executor.execute_tensor(b, concat=True)[0]
        np.testing.assert_array_equal(res, expected)

        a = np.array([0, 1]).reshape((2, 1, 1))
        c1 = np.array([1, 2, 3]).reshape((1, 3, 1))
        c2 = np.array([-1, -2, -3, -4, -5]).reshape((1, 1, 5))

        b = choose(a, (c1, c2))
        expected = np.choose(a, (c1, c2))

        res = self.executor.execute_tensor(b, concat=True)[0]
        np.testing.assert_array_equal(res, expected)

        # test order
        a = np.array([0, 1]).reshape((2, 1, 1), order='F')
        c1 = np.array([1, 2, 3]).reshape((1, 3, 1), order='F')
        c2 = np.array([-1, -2, -3, -4, -5]).reshape((1, 1, 5), order='F')

        b = choose(a, (c1, c2))
        expected = np.choose(a, (c1, c2))

        res = self.executor.execute_tensor(b, concat=True)[0]
        np.testing.assert_array_equal(res, expected)
        self.assertEqual(res.flags['C_CONTIGUOUS'],
                         expected.flags['C_CONTIGUOUS'])
        self.assertEqual(res.flags['F_CONTIGUOUS'],
                         expected.flags['F_CONTIGUOUS'])

        b = choose(a, (c1, c2), out=tensor(np.empty(res.shape, order='F')))
        expected = np.choose(a, (c1, c2), out=np.empty(res.shape, order='F'))

        res = self.executor.execute_tensor(b, concat=True)[0]
        np.testing.assert_array_equal(res, expected)
        self.assertEqual(res.flags['C_CONTIGUOUS'],
                         expected.flags['C_CONTIGUOUS'])
        self.assertEqual(res.flags['F_CONTIGUOUS'],
                         expected.flags['F_CONTIGUOUS'])

    def testUnravelExecution(self):
        a = tensor([22, 41, 37], chunk_size=1)
        t = stack(unravel_index(a, (7, 6)))

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.stack(np.unravel_index([22, 41, 37], (7, 6)))

        np.testing.assert_array_equal(res, expected)

    def testNonzeroExecution(self):
        data = np.array([[1, 0, 0], [0, 2, 0], [1, 1, 0]])
        x = tensor(data, chunk_size=2)
        t = hstack(nonzero(x))

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.hstack(np.nonzero(data))

        np.testing.assert_array_equal(res, expected)

        t = hstack((x > 1).nonzero())

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.hstack(np.nonzero(data > 1))

        np.testing.assert_array_equal(res, expected)

    def testFlatnonzeroExecution(self):
        x = arange(-2, 3, chunk_size=2)

        t = flatnonzero(x)

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.flatnonzero(np.arange(-2, 3))

        np.testing.assert_equal(res, expected)

    def testFillDiagonalExecution(self):
        # 2-d
        raws = [
            np.random.rand(30, 11),
            np.random.rand(15, 15),
            np.random.rand(11, 30),
            sps.random(30, 11, density=0.1, format='csr')
        ]

        def copy(x):
            if hasattr(x, 'nnz'):
                # sparse
                return x.A
            else:
                return x.copy()

        for raw in raws:
            # test 1 chunk, wrap=False
            t = tensor(raw, chunk_size=30)
            fill_diagonal(t, 1)

            res = self.executor.execute_tensor(t, concat=True)[0]
            expected = copy(raw)
            np.fill_diagonal(expected, 1)

            np.testing.assert_array_equal(np.asarray(res), expected)

            # test 1 chunk, wrap=True
            t = tensor(raw, chunk_size=30)
            fill_diagonal(t, 1, wrap=True)

            res = self.executor.execute_tensor(t, concat=True)[0]
            expected = copy(raw)
            np.fill_diagonal(expected, 1, wrap=True)

            np.testing.assert_array_equal(np.asarray(res), expected)

            # test multiple chunks, wrap=False
            t = tensor(raw, chunk_size=(12, 4))
            fill_diagonal(t, 1)

            res = self.executor.execute_tensor(t, concat=True)[0]
            expected = copy(raw)
            np.fill_diagonal(expected, 1)

            np.testing.assert_array_equal(np.asarray(res), expected)

            t = tensor(raw, chunk_size=(4, 12))
            fill_diagonal(t, 1)

            res = self.executor.execute_tensor(t, concat=True)[0]
            expected = copy(raw)
            np.fill_diagonal(expected, 1)

            np.testing.assert_array_equal(np.asarray(res), expected)

            # test multiple chunk, val with list type
            t = tensor(raw, chunk_size=(12, 4))
            fill_diagonal(t, [1, 2, 3])

            res = self.executor.execute_tensor(t, concat=True)[0]
            expected = copy(raw)
            np.fill_diagonal(expected, [1, 2, 3])

            np.testing.assert_array_equal(np.asarray(res), expected)

            # test multiple chunk, val with tensor type
            t = tensor(raw, chunk_size=(12, 4))
            fill_diagonal(t, tensor([1, 2, 3]))

            res = self.executor.execute_tensor(t, concat=True)[0]
            expected = copy(raw)
            np.fill_diagonal(expected, [1, 2, 3])

            np.testing.assert_array_equal(np.asarray(res), expected)

            # test multiple chunks, wrap=True
            t = tensor(raw, chunk_size=(12, 4))
            fill_diagonal(t, 1, wrap=True)

            res = self.executor.execute_tensor(t, concat=True)[0]
            expected = copy(raw)
            np.fill_diagonal(expected, 1, wrap=True)

            np.testing.assert_array_equal(np.asarray(res), expected)

            t = tensor(raw, chunk_size=(4, 12))
            fill_diagonal(t, 1, wrap=True)

            res = self.executor.execute_tensor(t, concat=True)[0]
            expected = copy(raw)
            np.fill_diagonal(expected, 1, wrap=True)

            np.testing.assert_array_equal(np.asarray(res), expected)

            # test multiple chunk, val with list type
            t = tensor(raw, chunk_size=(12, 4))
            fill_diagonal(t, [1, 2, 3], wrap=True)

            res = self.executor.execute_tensor(t, concat=True)[0]
            expected = copy(raw)
            np.fill_diagonal(expected, [1, 2, 3], wrap=True)

            np.testing.assert_array_equal(np.asarray(res), expected)

            # test multiple chunk, val with tensor type
            t = tensor(raw, chunk_size=(12, 4))
            fill_diagonal(t, tensor([[1, 2], [3, 4]]), wrap=True)

            res = self.executor.execute_tensor(t, concat=True)[0]
            expected = copy(raw)
            np.fill_diagonal(expected, [1, 2, 3, 4], wrap=True)

            np.testing.assert_array_equal(np.asarray(res), expected)

        # 3-d
        raw = np.random.rand(11, 11, 11)

        expected = raw.copy()
        np.fill_diagonal(expected, 1)
        expected2 = raw.copy()
        np.fill_diagonal(expected2, 1, wrap=True)
        np.testing.assert_array_equal(expected, expected2)

        # test 1 chunk
        t = tensor(raw, chunk_size=30)
        fill_diagonal(t, 1)

        res = self.executor.execute_tensor(t, concat=True)[0]

        np.testing.assert_array_equal(res, expected)

        t = tensor(raw, chunk_size=30)
        # wrap = True does not take effect when ndim > 2
        fill_diagonal(t, 1, wrap=True)

        res = self.executor.execute_tensor(t, concat=True)[0]

        np.testing.assert_array_equal(res, expected)

        # test multiple chunk
        t = tensor(raw, chunk_size=(3, 4, 5))
        fill_diagonal(t, 1)

        res = self.executor.execute_tensor(t, concat=True)[0]

        np.testing.assert_array_equal(res, expected)

        t = tensor(raw, chunk_size=(3, 4, 5))
        # wrap = True does not take effect when ndim > 2
        fill_diagonal(t, 1, wrap=True)

        res = self.executor.execute_tensor(t, concat=True)[0]

        np.testing.assert_array_equal(res, expected)

        # test val with list type
        t = tensor(raw, chunk_size=(3, 4, 5))
        fill_diagonal(t, [[1, 2], [3, 4]])

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = raw.copy()
        np.fill_diagonal(expected, [1, 2, 3, 4])

        np.testing.assert_array_equal(res, expected)

        # test val with tensor type
        t = tensor(raw, chunk_size=(3, 4, 5))
        fill_diagonal(t, tensor([1, 2, 3]))

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = raw.copy()
        np.fill_diagonal(expected, [1, 2, 3])

        np.testing.assert_array_equal(res, expected)

        # test val with tensor type which ndim == 0
        t = tensor(raw, chunk_size=(3, 4, 5))
        fill_diagonal(t, tensor([1, 2, 3]).sum())

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = raw.copy()
        np.fill_diagonal(expected, 6)

        np.testing.assert_array_equal(res, expected)

        # test val with ndarray type which size is too long
        t = tensor(raw, chunk_size=(3, 4, 5))
        fill_diagonal(t, np.arange(20))

        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = raw.copy()
        np.fill_diagonal(expected, np.arange(20))

        np.testing.assert_array_equal(res, expected)
Ejemplo n.º 26
0
class Test(TestBase):
    def setUp(self):
        super().setUp()
        self.executor = ExecutorForTest()

    def testFromPandasDataFrameExecution(self):
        pdf = pd.DataFrame(np.random.rand(20, 30),
                           index=[np.arange(20),
                                  np.arange(20, 0, -1)])
        df = from_pandas_df(pdf, chunk_size=(13, 21))

        result = self.executor.execute_dataframe(df, concat=True)[0]
        pd.testing.assert_frame_equal(pdf, result)

    def testFromPandasSeriesExecution(self):
        ps = pd.Series(np.random.rand(20),
                       index=[np.arange(20),
                              np.arange(20, 0, -1)],
                       name='a')
        series = from_pandas_series(ps, chunk_size=13)

        result = self.executor.execute_dataframe(series, concat=True)[0]
        pd.testing.assert_series_equal(ps, result)

    def testInitializerExecution(self):
        pdf = pd.DataFrame(np.random.rand(20, 30),
                           index=[np.arange(20),
                                  np.arange(20, 0, -1)])
        df = md.DataFrame(pdf, chunk_size=(15, 10))
        result = self.executor.execute_dataframe(df, concat=True)[0]
        pd.testing.assert_frame_equal(pdf, result)

        ps = pd.Series(np.random.rand(20),
                       index=[np.arange(20),
                              np.arange(20, 0, -1)],
                       name='a')
        series = md.Series(ps, chunk_size=7)
        result = self.executor.execute_dataframe(series, concat=True)[0]
        pd.testing.assert_series_equal(ps, result)

    def testSeriesFromTensor(self):
        data = np.random.rand(10)
        series = md.Series(mt.tensor(data), name='a')
        pd.testing.assert_series_equal(
            self.executor.execute_dataframe(series, concat=True)[0],
            pd.Series(data, name='a'))

        series = md.Series(mt.tensor(data, chunk_size=3))
        pd.testing.assert_series_equal(
            self.executor.execute_dataframe(series, concat=True)[0],
            pd.Series(data))

        series = md.Series(mt.ones((10, ), chunk_size=4))
        pd.testing.assert_series_equal(
            self.executor.execute_dataframe(series, concat=True)[0],
            pd.Series(np.ones(10, )))

        index_data = np.random.rand(10)
        series = md.Series(mt.tensor(data, chunk_size=3),
                           name='a',
                           index=mt.tensor(index_data, chunk_size=4))
        pd.testing.assert_series_equal(
            self.executor.execute_dataframe(series, concat=True)[0],
            pd.Series(data, name='a', index=index_data))

    def testFromTensorExecution(self):
        tensor = mt.random.rand(10, 10, chunk_size=5)
        df = dataframe_from_tensor(tensor)
        tensor_res = self.executor.execute_tensor(tensor, concat=True)[0]
        pdf_expected = pd.DataFrame(tensor_res)
        df_result = self.executor.execute_dataframe(df, concat=True)[0]
        pd.testing.assert_index_equal(df_result.index, pd.RangeIndex(0, 10))
        pd.testing.assert_index_equal(df_result.columns, pd.RangeIndex(0, 10))
        pd.testing.assert_frame_equal(df_result, pdf_expected)

        # test converted with specified index_value and columns
        tensor2 = mt.random.rand(2, 2, chunk_size=1)
        df2 = dataframe_from_tensor(tensor2,
                                    index=pd.Index(['a', 'b']),
                                    columns=pd.Index([3, 4]))
        df_result = self.executor.execute_dataframe(df2, concat=True)[0]
        pd.testing.assert_index_equal(df_result.index, pd.Index(['a', 'b']))
        pd.testing.assert_index_equal(df_result.columns, pd.Index([3, 4]))

        # test converted from 1-d tensor
        tensor3 = mt.array([1, 2, 3])
        df3 = dataframe_from_tensor(tensor3)
        result3 = self.executor.execute_dataframe(df3, concat=True)[0]
        pdf_expected = pd.DataFrame(np.array([1, 2, 3]))
        pd.testing.assert_frame_equal(pdf_expected, result3)

        # test converted from identical chunks
        tensor4 = mt.ones((10, 10), chunk_size=3)
        df4 = dataframe_from_tensor(tensor4)
        result4 = self.executor.execute_dataframe(df4, concat=True)[0]
        pdf_expected = pd.DataFrame(
            self.executor.execute_tensor(tensor4, concat=True)[0])
        pd.testing.assert_frame_equal(pdf_expected, result4)

        # from tensor with given index
        tensor5 = mt.ones((10, 10), chunk_size=3)
        df5 = dataframe_from_tensor(tensor5, index=np.arange(0, 20, 2))
        result5 = self.executor.execute_dataframe(df5, concat=True)[0]
        pdf_expected = pd.DataFrame(self.executor.execute_tensor(
            tensor5, concat=True)[0],
                                    index=np.arange(0, 20, 2))
        pd.testing.assert_frame_equal(pdf_expected, result5)

        # from tensor with given index that is a tensor
        raw7 = np.random.rand(10, 10)
        tensor7 = mt.tensor(raw7, chunk_size=3)
        index_raw7 = np.random.rand(10)
        index7 = mt.tensor(index_raw7, chunk_size=4)
        df7 = dataframe_from_tensor(tensor7, index=index7)
        result7 = self.executor.execute_dataframe(df7, concat=True)[0]
        pdf_expected = pd.DataFrame(raw7, index=index_raw7)
        pd.testing.assert_frame_equal(pdf_expected, result7)

        # from tensor with given columns
        tensor6 = mt.ones((10, 10), chunk_size=3)
        df6 = dataframe_from_tensor(tensor6, columns=list('abcdefghij'))
        result6 = self.executor.execute_dataframe(df6, concat=True)[0]
        pdf_expected = pd.DataFrame(self.executor.execute_tensor(
            tensor6, concat=True)[0],
                                    columns=list('abcdefghij'))
        pd.testing.assert_frame_equal(pdf_expected, result6)

        # from 1d tensors
        raws8 = [('a', np.random.rand(8)), ('b', np.random.randint(10,
                                                                   size=8)),
                 ('c', [
                     ''.join(np.random.choice(list(printable), size=6))
                     for _ in range(8)
                 ])]
        tensors8 = [mt.tensor(r[1], chunk_size=3) for r in raws8]
        df8 = dataframe_from_1d_tensors(tensors8,
                                        columns=[r[0] for r in raws8])
        result = self.executor.execute_dataframe(df8, concat=True)[0]
        pdf_expected = pd.DataFrame(OrderedDict(raws8))
        pd.testing.assert_frame_equal(result, pdf_expected)

        # from 1d tensors and specify index with a tensor
        index_raw9 = np.random.rand(8)
        index9 = mt.tensor(index_raw9, chunk_size=4)
        df9 = dataframe_from_1d_tensors(tensors8,
                                        columns=[r[0] for r in raws8],
                                        index=index9)
        result = self.executor.execute_dataframe(df9, concat=True)[0]
        pdf_expected = pd.DataFrame(OrderedDict(raws8), index=index_raw9)
        pd.testing.assert_frame_equal(result, pdf_expected)

    def testFromRecordsExecution(self):
        dtype = np.dtype([('x', 'int'), ('y', 'double'), ('z', '<U16')])

        ndarr = np.ones((10, ), dtype=dtype)
        pdf_expected = pd.DataFrame.from_records(ndarr,
                                                 index=pd.RangeIndex(10))

        # from structured array of mars
        tensor = mt.ones((10, ), dtype=dtype, chunk_size=3)
        df1 = from_records(tensor)
        df1_result = self.executor.execute_dataframe(df1, concat=True)[0]
        pd.testing.assert_frame_equal(df1_result, pdf_expected)

        # from structured array of numpy
        df2 = from_records(ndarr)
        df2_result = self.executor.execute_dataframe(df2, concat=True)[0]
        pd.testing.assert_frame_equal(df2_result, pdf_expected)

    def testReadCSVExecution(self):
        tempdir = tempfile.mkdtemp()
        file_path = os.path.join(tempdir, 'test.csv')
        try:
            df = pd.DataFrame(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
                              columns=['a', 'b', 'c'])
            df.to_csv(file_path)

            pdf = pd.read_csv(file_path, index_col=0)
            mdf = self.executor.execute_dataframe(md.read_csv(file_path,
                                                              index_col=0),
                                                  concat=True)[0]
            pd.testing.assert_frame_equal(pdf, mdf)

            mdf2 = self.executor.execute_dataframe(md.read_csv(file_path,
                                                               index_col=0,
                                                               chunk_bytes=10),
                                                   concat=True)[0]
            pd.testing.assert_frame_equal(pdf, mdf2)

        finally:
            shutil.rmtree(tempdir)

        # test sep
        tempdir = tempfile.mkdtemp()
        file_path = os.path.join(tempdir, 'test.csv')
        try:
            df = pd.DataFrame(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
                              columns=['a', 'b', 'c'])
            df.to_csv(file_path, sep=';')

            pdf = pd.read_csv(file_path, sep=';', index_col=0)
            mdf = self.executor.execute_dataframe(md.read_csv(file_path,
                                                              sep=';',
                                                              index_col=0),
                                                  concat=True)[0]
            pd.testing.assert_frame_equal(pdf, mdf)

            mdf2 = self.executor.execute_dataframe(md.read_csv(file_path,
                                                               sep=';',
                                                               index_col=0,
                                                               chunk_bytes=10),
                                                   concat=True)[0]
            pd.testing.assert_frame_equal(pdf, mdf2)

        finally:
            shutil.rmtree(tempdir)

        # test missing value
        tempdir = tempfile.mkdtemp()
        file_path = os.path.join(tempdir, 'test.csv')
        try:
            df = pd.DataFrame({
                'c1': [np.nan, 'a', 'b', 'c'],
                'c2': [1, 2, 3, np.nan],
                'c3': [np.nan, np.nan, 3.4, 2.2]
            })
            df.to_csv(file_path)

            pdf = pd.read_csv(file_path, index_col=0)
            mdf = self.executor.execute_dataframe(md.read_csv(file_path,
                                                              index_col=0),
                                                  concat=True)[0]
            pd.testing.assert_frame_equal(pdf, mdf)

            mdf2 = self.executor.execute_dataframe(md.read_csv(file_path,
                                                               index_col=0,
                                                               chunk_bytes=12),
                                                   concat=True)[0]
            pd.testing.assert_frame_equal(pdf, mdf2)

        finally:
            shutil.rmtree(tempdir)

        tempdir = tempfile.mkdtemp()
        file_path = os.path.join(tempdir, 'test.csv')
        try:
            index = pd.date_range(start='1/1/2018', periods=100)
            df = pd.DataFrame(
                {
                    'col1': np.random.rand(100),
                    'col2': np.random.choice(['a', 'b', 'c'], (100, )),
                    'col3': np.arange(100)
                },
                index=index)
            df.to_csv(file_path)

            pdf = pd.read_csv(file_path, index_col=0)
            mdf = self.executor.execute_dataframe(md.read_csv(file_path,
                                                              index_col=0),
                                                  concat=True)[0]
            pd.testing.assert_frame_equal(pdf, mdf)

            mdf2 = self.executor.execute_dataframe(md.read_csv(
                file_path, index_col=0, chunk_bytes=100),
                                                   concat=True)[0]
            pd.testing.assert_frame_equal(pdf, mdf2)

        finally:
            shutil.rmtree(tempdir)

        # test compression
        tempdir = tempfile.mkdtemp()
        file_path = os.path.join(tempdir, 'test.gzip')
        try:
            index = pd.date_range(start='1/1/2018', periods=100)
            df = pd.DataFrame(
                {
                    'col1': np.random.rand(100),
                    'col2': np.random.choice(['a', 'b', 'c'], (100, )),
                    'col3': np.arange(100)
                },
                index=index)
            df.to_csv(file_path, compression='gzip')

            pdf = pd.read_csv(file_path, compression='gzip', index_col=0)
            mdf = self.executor.execute_dataframe(md.read_csv(
                file_path, compression='gzip', index_col=0),
                                                  concat=True)[0]
            pd.testing.assert_frame_equal(pdf, mdf)

            mdf2 = self.executor.execute_dataframe(md.read_csv(
                file_path, compression='gzip', index_col=0, chunk_bytes='1k'),
                                                   concat=True)[0]
            pd.testing.assert_frame_equal(pdf, mdf2)

        finally:
            shutil.rmtree(tempdir)

        # test multiply files
        tempdir = tempfile.mkdtemp()
        try:
            df = pd.DataFrame(np.random.rand(300, 3), columns=['a', 'b', 'c'])

            file_paths = [
                os.path.join(tempdir, 'test{}.csv'.format(i)) for i in range(3)
            ]
            df[:100].to_csv(file_paths[0])
            df[100:200].to_csv(file_paths[1])
            df[200:].to_csv(file_paths[2])

            mdf = self.executor.execute_dataframe(md.read_csv(file_paths,
                                                              index_col=0),
                                                  concat=True)[0]
            pd.testing.assert_frame_equal(df, mdf)

            mdf2 = self.executor.execute_dataframe(md.read_csv(file_paths,
                                                               index_col=0,
                                                               chunk_bytes=50),
                                                   concat=True)[0]
            pd.testing.assert_frame_equal(df, mdf2)

        finally:
            shutil.rmtree(tempdir)

        # test wildcards in path
        tempdir = tempfile.mkdtemp()
        try:
            df = pd.DataFrame(np.random.rand(300, 3), columns=['a', 'b', 'c'])

            file_paths = [
                os.path.join(tempdir, 'test{}.csv'.format(i)) for i in range(3)
            ]
            df[:100].to_csv(file_paths[0])
            df[100:200].to_csv(file_paths[1])
            df[200:].to_csv(file_paths[2])

            # As we can not guarantee the order in which these files are processed,
            # the result may not keep the original order.
            mdf = self.executor.execute_dataframe(md.read_csv(
                '{}/*.csv'.format(tempdir), index_col=0),
                                                  concat=True)[0]
            pd.testing.assert_frame_equal(df, mdf.sort_index())

            mdf2 = self.executor.execute_dataframe(md.read_csv(
                '{}/*.csv'.format(tempdir), index_col=0, chunk_bytes=50),
                                                   concat=True)[0]
            pd.testing.assert_frame_equal(df, mdf2.sort_index())

        finally:
            shutil.rmtree(tempdir)

    @require_cudf
    def testReadCSVGPUExecution(self):
        tempdir = tempfile.mkdtemp()
        file_path = os.path.join(tempdir, 'test.csv')
        try:
            df = pd.DataFrame({
                'col1':
                np.random.rand(100),
                'col2':
                np.random.choice(['a', 'b', 'c'], (100, )),
                'col3':
                np.arange(100)
            })
            df.to_csv(file_path, index=False)

            pdf = pd.read_csv(file_path)
            mdf = self.executor.execute_dataframe(md.read_csv(file_path,
                                                              gpu=True),
                                                  concat=True)[0]
            pd.testing.assert_frame_equal(
                pdf.reset_index(drop=True),
                mdf.to_pandas().reset_index(drop=True))

            mdf2 = self.executor.execute_dataframe(md.read_csv(
                file_path, gpu=True, chunk_bytes=200),
                                                   concat=True)[0]
            pd.testing.assert_frame_equal(
                pdf.reset_index(drop=True),
                mdf2.to_pandas().reset_index(drop=True))

        finally:
            shutil.rmtree(tempdir)
Ejemplo n.º 27
0
class Test(unittest.TestCase):
    def setUp(self):
        self.executor = ExecutorForTest('numpy')

    def testQRExecution(self):
        data = np.random.randn(18, 6)

        a = tensor(data, chunk_size=(3, 6))
        q, r = qr(a)
        t = q.dot(r)

        res = self.executor.execute_tensor(t, concat=True)[0]
        self.assertTrue(np.allclose(res, data))

        a = tensor(data, chunk_size=(9, 6))
        q, r = qr(a)
        t = q.dot(r)

        res = self.executor.execute_tensor(t, concat=True)[0]
        self.assertTrue(np.allclose(res, data))

        a = tensor(data, chunk_size=3)
        q, r = qr(a)
        t = q.dot(r)

        res = self.executor.execute_tensor(t, concat=True)[0]
        self.assertTrue(np.allclose(res, data))

        # test for Short-and-Fat QR
        data = np.random.randn(6, 18)

        a = tensor(data, chunk_size=(6, 9))
        q, r = qr(a, method='sfqr')
        t = q.dot(r)

        res = self.executor.execute_tensor(t, concat=True)[0]
        self.assertTrue(np.allclose(res, data))

        a = tensor(data, chunk_size=(3, 3))
        q, r = qr(a, method='sfqr')
        t = q.dot(r)

        res = self.executor.execute_tensor(t, concat=True)[0]
        self.assertTrue(np.allclose(res, data))

        a = tensor(data, chunk_size=(6, 3))
        q, r = qr(a, method='sfqr')
        t = q.dot(r)

        res = self.executor.execute_tensor(t, concat=True)[0]
        self.assertTrue(np.allclose(res, data))

    def testSVDExecution(self):
        data = np.random.randn(18, 6) + 1j * np.random.randn(18, 6)

        a = tensor(data, chunk_size=(9, 6))
        U, s, V = svd(a)
        t = U.dot(diag(s).dot(V))

        res = self.executor.execute_tensor(t, concat=True)[0]
        self.assertTrue(np.allclose(res, data))

        a = tensor(data, chunk_size=(18, 6))
        U, s, V = svd(a)
        t = U.dot(diag(s).dot(V))

        res = self.executor.execute_tensor(t, concat=True)[0]
        self.assertTrue(np.allclose(res, data))

        a = tensor(data, chunk_size=(2, 6))
        U, s, V = svd(a)
        t = U.dot(diag(s).dot(V))

        res = self.executor.execute_tensor(t, concat=True)[0]
        self.assertTrue(np.allclose(res, data))

        data = np.random.randn(6, 18) + 1j * np.random.randn(6, 18)

        a = tensor(data)
        U, s, V = svd(a)
        t = U.dot(diag(s).dot(V))

        res = self.executor.execute_tensor(t, concat=True)[0]
        self.assertTrue(np.allclose(res, data))

        # test for matrix of ones
        data = np.ones((20, 10))

        a = tensor(data, chunk_size=10)
        s = svd(a)[1]
        res = self.executor.execute_tensor(s, concat=True)[0]
        expected = np.linalg.svd(a)[1]
        np.testing.assert_array_almost_equal(res, expected)

    def testRandomizedSVDExecution(self):
        n_samples = 100
        n_features = 500
        rank = 5
        k = 10
        for dtype in (np.int32, np.int64, np.float32, np.float64):
            # generate a matrix X of approximate effective rank `rank` and no noise
            # component (very structured signal):
            X = make_low_rank_matrix(n_samples=n_samples,
                                     n_features=n_features,
                                     effective_rank=rank,
                                     tail_strength=0.0,
                                     random_state=0).astype(dtype, copy=False)
            self.assertEqual(X.shape, (n_samples, n_features))
            dtype = np.dtype(dtype)
            decimal = 5 if dtype == np.float32 else 7

            # compute the singular values of X using the slow exact method
            X_res = self.executor.execute_tensor(X, concat=True)[0]
            U, s, V = np.linalg.svd(X_res, full_matrices=False)

            # Convert the singular values to the specific dtype
            U = U.astype(dtype, copy=False)
            s = s.astype(dtype, copy=False)
            V = V.astype(dtype, copy=False)

            for normalizer in ['auto', 'LU',
                               'QR']:  # 'none' would not be stable
                # compute the singular values of X using the fast approximate method
                Ua, sa, Va = randomized_svd(
                    X,
                    k,
                    power_iteration_normalizer=normalizer,
                    random_state=0)

                # If the input dtype is float, then the output dtype is float of the
                # same bit size (f32 is not upcast to f64)
                # But if the input dtype is int, the output dtype is float64
                if dtype.kind == 'f':
                    self.assertEqual(Ua.dtype, dtype)
                    self.assertEqual(sa.dtype, dtype)
                    self.assertEqual(Va.dtype, dtype)
                else:
                    self.assertEqual(Ua.dtype, np.float64)
                    self.assertEqual(sa.dtype, np.float64)
                    self.assertEqual(Va.dtype, np.float64)

                self.assertEqual(Ua.shape, (n_samples, k))
                self.assertEqual(sa.shape, (k, ))
                self.assertEqual(Va.shape, (k, n_features))

                # ensure that the singular values of both methods are equal up to the
                # real rank of the matrix
                sa_res = self.executor.execute_tensor(sa, concat=True)[0]
                np.testing.assert_almost_equal(s[:k], sa_res, decimal=decimal)

                # check the singular vectors too (while not checking the sign)
                dot_res = self.executor.execute_tensor(dot(Ua, Va),
                                                       concat=True)[0]
                np.testing.assert_almost_equal(np.dot(U[:, :k], V[:k, :]),
                                               dot_res,
                                               decimal=decimal)

    def testCholeskyExecution(self):
        data = np.random.randint(1, 10, (10, 10))
        symmetric_data = data.dot(data.T)

        a = tensor(symmetric_data, chunk_size=5)

        U = cholesky(a)
        t = U.T.dot(U)

        res_u = self.executor.execute_tensor(U, concat=True)[0]
        np.testing.assert_allclose(np.triu(res_u), res_u)

        res = self.executor.execute_tensor(t, concat=True)[0]
        self.assertTrue(np.allclose(res, symmetric_data))

        L = cholesky(a, lower=True)
        U = cholesky(a)
        t = L.dot(U)

        res = self.executor.execute_tensor(t, concat=True)[0]
        self.assertTrue(np.allclose(res, symmetric_data))

        a = tensor(symmetric_data, chunk_size=2)

        L = cholesky(a, lower=True)
        U = cholesky(a)
        t = L.dot(U)

        res = self.executor.execute_tensor(t, concat=True)[0]
        np.testing.assert_allclose(res, symmetric_data)

        a = tensor(symmetric_data, chunk_size=(1, 2))

        L = cholesky(a, lower=True)
        U = cholesky(a)
        t = L.dot(U)

        res = self.executor.execute_tensor(t, concat=True)[0]
        np.testing.assert_allclose(res, symmetric_data)

        a = tensor(symmetric_data, chunk_size=4)

        L = cholesky(a, lower=True)
        U = cholesky(a)
        t = L.dot(U)

        res_u = self.executor.execute_tensor(U, concat=True)[0]
        np.testing.assert_allclose(np.triu(res_u), res_u)

        res = self.executor.execute_tensor(t, concat=True)[0]
        np.testing.assert_allclose(res, symmetric_data)

        a = tensor(symmetric_data, chunk_size=3)

        L = cholesky(a, lower=True)
        U = cholesky(a)
        t = L.dot(U)

        res_u = self.executor.execute_tensor(U, concat=True)[0]
        np.testing.assert_allclose(np.triu(res_u), res_u)

        res = self.executor.execute_tensor(t, concat=True)[0]
        np.testing.assert_allclose(res, symmetric_data)

    def testLUExecution(self):
        np.random.seed(1)

        # square matrix
        data = np.random.randint(1, 10, (6, 6))

        a = tensor(data)
        P, L, U = lu(a)

        # check lower and upper triangular matrix
        result_l = self.executor.execute_tensor(L, concat=True)[0]
        result_u = self.executor.execute_tensor(U, concat=True)[0]

        np.testing.assert_allclose(np.tril(result_l), result_l)
        np.testing.assert_allclose(np.triu(result_u), result_u)

        t = P.dot(L).dot(U)
        res = self.executor.execute_tensor(t, concat=True)[0]
        np.testing.assert_allclose(res, data)

        a = tensor(data, chunk_size=2)
        P, L, U = lu(a)

        # check lower and upper triangular matrix
        result_l = self.executor.execute_tensor(L, concat=True)[0]
        result_u = self.executor.execute_tensor(U, concat=True)[0]

        np.testing.assert_allclose(np.tril(result_l), result_l)
        np.testing.assert_allclose(np.triu(result_u), result_u)

        t = P.dot(L).dot(U)
        res = self.executor.execute_tensor(t, concat=True)[0]
        np.testing.assert_allclose(res, data)

        a = tensor(data, chunk_size=(2, 3))
        P, L, U = lu(a)

        # check lower and upper triangular matrix
        result_l = self.executor.execute_tensor(L, concat=True)[0]
        result_u = self.executor.execute_tensor(U, concat=True)[0]

        np.testing.assert_allclose(np.tril(result_l), result_l)
        np.testing.assert_allclose(np.triu(result_u), result_u)

        t = P.dot(L).dot(U)
        res = self.executor.execute_tensor(t, concat=True)[0]
        np.testing.assert_allclose(res, data)

        a = tensor(data, chunk_size=4)
        P, L, U = lu(a)

        # check lower and upper triangular matrix
        result_l = self.executor.execute_tensor(L, concat=True)[0]
        result_u = self.executor.execute_tensor(U, concat=True)[0]

        np.testing.assert_allclose(np.tril(result_l), result_l)
        np.testing.assert_allclose(np.triu(result_u), result_u)

        t = P.dot(L).dot(U)
        res = self.executor.execute_tensor(t, concat=True)[0]
        np.testing.assert_allclose(res, data)

        # shape[0] > shape[1]
        data = np.random.randint(1, 10, (10, 6))

        a = tensor(data)
        P, L, U = lu(a)

        # check lower and upper triangular matrix
        result_l = self.executor.execute_tensor(L, concat=True)[0]
        result_u = self.executor.execute_tensor(U, concat=True)[0]

        np.testing.assert_allclose(np.tril(result_l), result_l)
        np.testing.assert_allclose(np.triu(result_u), result_u)

        t = P.dot(L).dot(U)
        res = self.executor.execute_tensor(t, concat=True)[0]
        np.testing.assert_allclose(res, data)

        a = tensor(data, chunk_size=2)
        P, L, U = lu(a)

        # check lower and upper triangular matrix
        result_l = self.executor.execute_tensor(L, concat=True)[0]
        result_u = self.executor.execute_tensor(U, concat=True)[0]

        np.testing.assert_allclose(np.tril(result_l), result_l)
        np.testing.assert_allclose(np.triu(result_u), result_u)

        t = P.dot(L).dot(U)
        res = self.executor.execute_tensor(t, concat=True)[0]
        np.testing.assert_allclose(res, data)

        a = tensor(data, chunk_size=(2, 3))
        P, L, U = lu(a)

        # check lower and upper triangular matrix
        result_l = self.executor.execute_tensor(L, concat=True)[0]
        result_u = self.executor.execute_tensor(U, concat=True)[0]

        np.testing.assert_allclose(np.tril(result_l), result_l)
        np.testing.assert_allclose(np.triu(result_u), result_u)

        t = P.dot(L).dot(U)
        res = self.executor.execute_tensor(t, concat=True)[0]
        np.testing.assert_allclose(res, data)

        a = tensor(data, chunk_size=4)
        P, L, U = lu(a)

        # check lower and upper triangular matrix
        result_l, result_u = self.executor.execute_tensors([L, U])

        np.testing.assert_allclose(np.tril(result_l), result_l)
        np.testing.assert_allclose(np.triu(result_u), result_u)

        t = P.dot(L).dot(U)
        res = self.executor.execute_tensor(t, concat=True)[0]
        np.testing.assert_allclose(res, data)

        # shape[0] < shape[1]
        data = np.random.randint(1, 10, (6, 10))

        a = tensor(data)
        P, L, U = lu(a)

        # check lower and upper triangular matrix
        result_l = self.executor.execute_tensor(L, concat=True)[0]
        result_u = self.executor.execute_tensor(U, concat=True)[0]

        np.testing.assert_allclose(np.tril(result_l), result_l)
        np.testing.assert_allclose(np.triu(result_u), result_u)

        t = P.dot(L).dot(U)
        res = self.executor.execute_tensor(t, concat=True)[0]
        np.testing.assert_allclose(res, data)

        a = tensor(data, chunk_size=2)
        P, L, U = lu(a)

        # check lower and upper triangular matrix
        result_l = self.executor.execute_tensor(L, concat=True)[0]
        result_u = self.executor.execute_tensor(U, concat=True)[0]

        np.testing.assert_allclose(np.tril(result_l), result_l)
        np.testing.assert_allclose(np.triu(result_u), result_u)

        t = P.dot(L).dot(U)
        res = self.executor.execute_tensor(t, concat=True)[0]
        np.testing.assert_allclose(res, data)

        a = tensor(data, chunk_size=(2, 3))
        P, L, U = lu(a)

        # check lower and upper triangular matrix
        result_l = self.executor.execute_tensor(L, concat=True)[0]
        result_u = self.executor.execute_tensor(U, concat=True)[0]

        np.testing.assert_allclose(np.tril(result_l), result_l)
        np.testing.assert_allclose(np.triu(result_u), result_u)

        t = P.dot(L).dot(U)
        res = self.executor.execute_tensor(t, concat=True)[0]
        np.testing.assert_allclose(res, data)

        a = tensor(data, chunk_size=4)
        P, L, U = lu(a)

        # check lower and upper triangular matrix
        result_l, result_u = self.executor.execute_tensors([L, U])

        np.testing.assert_allclose(np.tril(result_l), result_l)
        np.testing.assert_allclose(np.triu(result_u), result_u)

        t = P.dot(L).dot(U)
        res = self.executor.execute_tensor(t, concat=True)[0]
        np.testing.assert_allclose(res, data)

        # test for sparse
        data = sps.csr_matrix([[2, 0, 0, 0, 5, 2], [0, 6, 1, 0, 0, 6],
                               [8, 0, 9, 0, 0, 2], [0, 6, 0, 8, 7, 3],
                               [7, 0, 6, 1, 7, 0], [0, 0, 0, 7, 0, 8]])

        a = tensor(data)
        P, L, U = lu(a)
        result_l = self.executor.execute_tensor(L, concat=True)[0]
        result_u = self.executor.execute_tensor(U, concat=True)[0]

        # check lower and upper triangular matrix
        np.testing.assert_allclose(np.tril(result_l), result_l)
        np.testing.assert_allclose(np.triu(result_u), result_u)
        self.assertIsInstance(result_l, SparseNDArray)
        self.assertIsInstance(result_u, SparseNDArray)

        t = P.dot(L).dot(U)
        res = self.executor.execute_tensor(t, concat=True)[0]
        np.testing.assert_array_almost_equal(data.A, res)

        a = tensor(data, chunk_size=2)
        P, L, U = lu(a)
        result_l = self.executor.execute_tensor(L, concat=True)[0]
        result_u = self.executor.execute_tensor(U, concat=True)[0]

        # check lower and upper triangular matrix
        np.testing.assert_allclose(np.tril(result_l), result_l)
        np.testing.assert_allclose(np.triu(result_u), result_u)
        self.assertIsInstance(result_l, SparseNDArray)
        self.assertIsInstance(result_u, SparseNDArray)

        t = P.dot(L).dot(U)
        res = self.executor.execute_tensor(t, concat=True)[0]
        np.testing.assert_array_almost_equal(data.A, res)

        a = tensor(data, chunk_size=(2, 3))
        P, L, U = lu(a)
        result_l = self.executor.execute_tensor(L, concat=True)[0]
        result_u = self.executor.execute_tensor(U, concat=True)[0]

        # check lower and upper triangular matrix
        np.testing.assert_allclose(np.tril(result_l), result_l)
        np.testing.assert_allclose(np.triu(result_u), result_u)
        self.assertIsInstance(result_l, SparseNDArray)
        self.assertIsInstance(result_u, SparseNDArray)

        t = P.dot(L).dot(U)
        res = self.executor.execute_tensor(t, concat=True)[0]
        np.testing.assert_array_almost_equal(data.A, res)

        a = tensor(data, chunk_size=4)
        P, L, U = lu(a)
        result_l = self.executor.execute_tensor(L, concat=True)[0]
        result_u = self.executor.execute_tensor(U, concat=True)[0]

        # check lower and upper triangular matrix
        np.testing.assert_allclose(np.tril(result_l), result_l)
        np.testing.assert_allclose(np.triu(result_u), result_u)
        self.assertIsInstance(result_l, SparseNDArray)
        self.assertIsInstance(result_u, SparseNDArray)

        t = P.dot(L).dot(U)
        res = self.executor.execute_tensor(t, concat=True)[0]
        np.testing.assert_array_almost_equal(data.A, res)

    def testSolveTriangular(self):
        from mars.tensor import tril, triu
        np.random.seed(1)

        data1 = np.random.randint(1, 10, (20, 20))
        data2 = np.random.randint(1, 10, (20, ))

        A = tensor(data1, chunk_size=20)
        b = tensor(data2, chunk_size=20)

        x = solve_triangular(A, b)
        t = triu(A).dot(x)

        res = self.executor.execute_tensor(t, concat=True)[0]
        np.testing.assert_allclose(res, data2)

        x = solve_triangular(A, b, lower=True)
        t = tril(A).dot(x)

        res = self.executor.execute_tensor(t, concat=True)[0]
        np.testing.assert_allclose(res, data2)

        A = tensor(data1, chunk_size=10)
        b = tensor(data2, chunk_size=10)

        x = solve_triangular(A, b)
        t = triu(A).dot(x)

        res = self.executor.execute_tensor(t, concat=True)[0]
        np.testing.assert_allclose(res, data2)

        x = solve_triangular(A, b, lower=True)
        t = tril(A).dot(x)

        res = self.executor.execute_tensor(t, concat=True)[0]
        np.testing.assert_allclose(res, data2)

        data1 = np.random.randint(1, 10, (10, 10))
        data2 = np.random.randint(1, 10, (10, 5))

        A = tensor(data1, chunk_size=10)
        b = tensor(data2, chunk_size=10)

        x = solve_triangular(A, b)
        t = triu(A).dot(x)

        res = self.executor.execute_tensor(t, concat=True)[0]
        np.testing.assert_allclose(res, data2)

        x = solve_triangular(A, b, lower=True)
        t = tril(A).dot(x)

        res = self.executor.execute_tensor(t, concat=True)[0]
        np.testing.assert_allclose(res, data2)

        A = tensor(data1, chunk_size=3)
        b = tensor(data2, chunk_size=3)

        x = solve_triangular(A, b)
        t = triu(A).dot(x)

        res = self.executor.execute_tensor(t, concat=True)[0]
        np.testing.assert_allclose(res, data2)

        x = solve_triangular(A, b, lower=True)
        t = tril(A).dot(x)

        res = self.executor.execute_tensor(t, concat=True)[0]
        np.testing.assert_allclose(res, data2)

        # test sparse
        data1 = sps.csr_matrix(np.triu(np.random.randint(1, 10, (10, 10))))
        data2 = np.random.random((10, ))

        A = tensor(data1, chunk_size=5)
        b = tensor(data2, chunk_size=5)

        x = solve_triangular(A, b)

        result_x = self.executor.execute_tensor(x, concat=True)[0]
        result_b = data1.dot(result_x)

        self.assertIsInstance(result_x, SparseNDArray)
        np.testing.assert_allclose(result_b, data2)

        data1 = sps.csr_matrix(np.triu(np.random.randint(1, 10, (10, 10))))
        data2 = np.random.random((10, 2))

        A = tensor(data1, chunk_size=5)
        b = tensor(data2, chunk_size=5)

        x = solve_triangular(A, b)

        result_x = self.executor.execute_tensor(x, concat=True)[0]
        result_b = data1.dot(result_x)

        self.assertIsInstance(result_x, SparseNDArray)
        np.testing.assert_allclose(result_b, data2)

    def testSolve(self):
        import scipy.linalg
        np.random.seed(1)

        data1 = np.random.randint(1, 10, (20, 20))
        data2 = np.random.randint(1, 10, (20, ))

        A = tensor(data1, chunk_size=5)
        b = tensor(data2, chunk_size=5)

        x = solve(A, b)

        res = self.executor.execute_tensor(x, concat=True)[0]
        np.testing.assert_allclose(res, scipy.linalg.solve(data1, data2))
        res = self.executor.execute_tensor(A.dot(x), concat=True)[0]
        np.testing.assert_allclose(res, data2)

        data2 = np.random.randint(1, 10, (20, 5))

        A = tensor(data1, chunk_size=5)
        b = tensor(data2, chunk_size=5)

        x = solve(A, b)

        res = self.executor.execute_tensor(x, concat=True)[0]
        np.testing.assert_allclose(res, scipy.linalg.solve(data1, data2))
        res = self.executor.execute_tensor(A.dot(x), concat=True)[0]
        np.testing.assert_allclose(res, data2)

        data2 = np.random.randint(1, 10, (20, 20))

        A = tensor(data1, chunk_size=5)
        b = tensor(data2, chunk_size=5)

        x = solve(A, b)

        res = self.executor.execute_tensor(x, concat=True)[0]
        np.testing.assert_allclose(res, scipy.linalg.solve(data1, data2))
        res = self.executor.execute_tensor(A.dot(x), concat=True)[0]
        np.testing.assert_allclose(res, data2)

        # test for not all chunks are square in matrix A
        data2 = np.random.randint(1, 10, (20, ))

        A = tensor(data1, chunk_size=6)
        b = tensor(data2, chunk_size=6)

        x = solve(A, b)

        res = self.executor.execute_tensor(x, concat=True)[0]
        np.testing.assert_allclose(res, scipy.linalg.solve(data1, data2))
        res = self.executor.execute_tensor(A.dot(x), concat=True)[0]
        np.testing.assert_allclose(res, data2)

        A = tensor(data1, chunk_size=(7, 6))
        b = tensor(data2, chunk_size=6)

        x = solve(A, b)

        res = self.executor.execute_tensor(x, concat=True)[0]
        np.testing.assert_allclose(res, scipy.linalg.solve(data1, data2))
        res = self.executor.execute_tensor(A.dot(x), concat=True)[0]
        np.testing.assert_allclose(res, data2)

        # test sparse
        data1 = sps.csr_matrix(np.random.randint(1, 10, (20, 20)))
        data2 = np.random.randint(1, 10, (20, ))

        A = tensor(data1, chunk_size=5)
        b = tensor(data2, chunk_size=5)

        x = solve(A, b)

        res = self.executor.execute_tensor(x, concat=True)[0]
        self.assertIsInstance(res, SparseNDArray)
        np.testing.assert_allclose(data1.dot(res), data2)

        data2 = np.random.randint(1, 10, (20, 5))

        A = tensor(data1, chunk_size=5)
        b = tensor(data2, chunk_size=5)

        x = solve(A, b)

        res = self.executor.execute_tensor(A.dot(x), concat=True)[0]
        self.assertIsInstance(res, SparseNDArray)
        np.testing.assert_allclose(res, data2)

        data2 = np.random.randint(1, 10, (20, 20))

        A = tensor(data1, chunk_size=5)
        b = tensor(data2, chunk_size=5)

        x = solve(A, b)

        res = self.executor.execute_tensor(A.dot(x), concat=True)[0]
        self.assertIsInstance(res, SparseNDArray)
        np.testing.assert_allclose(res, data2)

        # test for not all chunks are square in matrix A
        data2 = np.random.randint(1, 10, (20, ))

        A = tensor(data1, chunk_size=6)
        b = tensor(data2, chunk_size=6)

        x = solve(A, b)

        res = self.executor.execute_tensor(A.dot(x), concat=True)[0]
        np.testing.assert_allclose(res, data2)

    def testSolveSymPos(self):
        import scipy.linalg
        np.random.seed(1)

        data = np.random.randint(1, 10, (20, 20))
        data_l = np.tril(data)
        data1 = data_l.dot(data_l.T)
        data2 = np.random.randint(1, 10, (20, ))

        A = tensor(data1, chunk_size=5)
        b = tensor(data2, chunk_size=5)

        x = solve(A, b, sym_pos=True)

        res = self.executor.execute_tensor(x, concat=True)[0]
        np.testing.assert_allclose(res, scipy.linalg.solve(data1, data2))
        res = self.executor.execute_tensor(A.dot(x), concat=True)[0]
        np.testing.assert_allclose(res, data2)

    def testInv(self):
        import scipy.linalg
        np.random.seed(1)

        data = np.random.randint(1, 10, (20, 20))

        A = tensor(data)
        inv_A = inv(A)

        res = self.executor.execute_tensor(inv_A, concat=True)[0]
        self.assertTrue(np.allclose(res, scipy.linalg.inv(data)))
        res = self.executor.execute_tensor(A.dot(inv_A), concat=True)[0]
        self.assertTrue(np.allclose(res, np.eye(data.shape[0], dtype=float)))

        A = tensor(data, chunk_size=5)
        inv_A = inv(A)

        res = self.executor.execute_tensor(inv_A, concat=True)[0]
        self.assertTrue(np.allclose(res, scipy.linalg.inv(data)))
        res = self.executor.execute_tensor(A.dot(inv_A), concat=True)[0]
        self.assertTrue(np.allclose(res, np.eye(data.shape[0], dtype=float)))

        # test 1 chunk
        A = tensor(data, chunk_size=20)
        inv_A = inv(A)

        res = self.executor.execute_tensor(inv_A, concat=True)[0]
        self.assertTrue(np.allclose(res, scipy.linalg.inv(data)))
        res = self.executor.execute_tensor(A.dot(inv_A), concat=True)[0]
        self.assertTrue(np.allclose(res, np.eye(data.shape[0], dtype=float)))

        B = A.T.dot(A)
        inv_B = inv(B)
        res = self.executor.execute_tensor(inv_B, concat=True)[0]
        self.assertTrue(np.allclose(res, scipy.linalg.inv(data.T.dot(data))))
        res = self.executor.execute_tensor(B.dot(inv_B), concat=True)[0]
        self.assertTrue(np.allclose(res, np.eye(data.shape[0], dtype=float)))

        # test for not all chunks are square in matrix A
        A = tensor(data, chunk_size=8)
        inv_A = inv(A)

        res = self.executor.execute_tensor(inv_A, concat=True)[0]
        self.assertTrue(np.allclose(res, scipy.linalg.inv(data)))
        res = self.executor.execute_tensor(A.dot(inv_A), concat=True)[0]
        self.assertTrue(np.allclose(res, np.eye(data.shape[0], dtype=float)))

        # test sparse
        data = np.random.randint(1, 10, (20, 20))
        sp_data = sps.csr_matrix(data)

        A = tensor(sp_data, chunk_size=5)
        inv_A = inv(A)

        res = self.executor.execute_tensor(inv_A, concat=True)[0]
        self.assertIsInstance(res, SparseNDArray)
        self.assertTrue(np.allclose(res, scipy.linalg.inv(data)))
        res = self.executor.execute_tensor(A.dot(inv_A), concat=True)[0]
        self.assertTrue(np.allclose(res, np.eye(data.shape[0], dtype=float)))

        # test for not all chunks are square in matrix A
        A = tensor(sp_data, chunk_size=8)
        inv_A = inv(A)

        res = self.executor.execute_tensor(inv_A, concat=True)[0]
        self.assertIsInstance(res, SparseNDArray)
        self.assertTrue(np.allclose(res, scipy.linalg.inv(data)))
        res = self.executor.execute_tensor(A.dot(inv_A), concat=True)[0]
        self.assertTrue(np.allclose(res, np.eye(data.shape[0], dtype=float)))

    @ignore_warning
    def testNormExecution(self):
        d = np.arange(9) - 4
        d2 = d.reshape(3, 3)

        ma = [
            tensor(d, chunk_size=2),
            tensor(d, chunk_size=9),
            tensor(d2, chunk_size=(2, 3)),
            tensor(d2, chunk_size=3)
        ]

        for i, a in enumerate(ma):
            data = d if i < 2 else d2
            for ord in (None, 'nuc', np.inf, -np.inf, 0, 1, -1, 2, -2):
                for axis in (0, 1, (0, 1)):
                    for keepdims in (True, False):
                        try:
                            expected = np.linalg.norm(data,
                                                      ord=ord,
                                                      axis=axis,
                                                      keepdims=keepdims)
                            t = norm(a, ord=ord, axis=axis, keepdims=keepdims)
                            concat = t.ndim > 0
                            res = self.executor.execute_tensor(
                                t, concat=concat)[0]

                            np.testing.assert_allclose(res,
                                                       expected,
                                                       atol=.0001)
                        except ValueError:
                            continue

        m = norm(tensor(d))
        expected = self.executor.execute_tensor(m)[0]
        res = np.linalg.norm(d)
        self.assertEqual(expected, res)

        d = uniform(-0.5, 0.5, size=(500, 2), chunk_size=50)
        inside = (norm(d, axis=1) < 0.5).sum().astype(float)
        t = inside / 500 * 4
        res = self.executor.execute_tensor(t)[0]
        self.assertAlmostEqual(res, 3.14, delta=1)

        raw = np.random.RandomState(0).rand(10, 10)
        d = norm(tensor(raw, chunk_size=5))
        expected = self.executor.execute_tensor(d, concat=True)[0]
        result = np.linalg.norm(raw)
        np.testing.assert_allclose(expected, result)

    def testTensordotExecution(self):
        size_executor = ExecutorForTest(
            sync_provider_type=ExecutorForTest.SyncProviderType.MOCK)

        a_data = np.arange(60).reshape(3, 4, 5)
        a = tensor(a_data, chunk_size=2)
        b_data = np.arange(24).reshape(4, 3, 2)
        b = tensor(b_data, chunk_size=2)

        axes = ([1, 0], [0, 1])
        c = tensordot(a, b, axes=axes)
        size_res = size_executor.execute_tensor(c, mock=True)
        self.assertEqual(sum(s[0] for s in size_res), c.nbytes)
        self.assertEqual(sum(s[1] for s in size_res), c.nbytes)

        res = self.executor.execute_tensor(c)
        expected = np.tensordot(a_data, b_data, axes=axes)
        self.assertTrue(np.array_equal(res[0], expected[:2, :]))
        self.assertTrue(np.array_equal(res[1], expected[2:4, :]))
        self.assertTrue(np.array_equal(res[2], expected[4:, :]))

        a = ones((1000, 2000), chunk_size=500)
        b = ones((2000, 100), chunk_size=500)
        c = dot(a, b)
        res = self.executor.execute_tensor(c)
        expected = np.dot(np.ones((1000, 2000)), np.ones((2000, 100)))
        self.assertEqual(len(res), 2)
        self.assertTrue(np.array_equal(res[0], expected[:500, :]))
        self.assertTrue(np.array_equal(res[1], expected[500:, :]))

        a = ones((10, 8), chunk_size=2)
        b = ones((8, 10), chunk_size=2)
        c = a.dot(b)
        res = self.executor.execute_tensor(c)
        self.assertEqual(len(res), 25)
        for r in res:
            self.assertTrue(np.array_equal(r, np.tile([8], [2, 2])))

        a = ones((500, 500), chunk_size=500)
        b = ones((500, 100), chunk_size=500)
        c = a.dot(b)
        res = self.executor.execute_tensor(c)
        self.assertTrue(np.array_equal(res[0], np.tile([500], [500, 100])))

        raw_a = np.random.random((100, 200, 50))
        raw_b = np.random.random((200, 10, 100))
        a = tensor(raw_a, chunk_size=50)
        b = tensor(raw_b, chunk_size=33)
        c = tensordot(a, b, axes=((0, 1), (2, 0)))
        res = self.executor.execute_tensor(c, concat=True)
        expected = np.tensordot(raw_a, raw_b, axes=(c.op.a_axes, c.op.b_axes))
        self.assertTrue(np.allclose(res[0], expected))

        a = ones((1000, 2000), chunk_size=500)
        b = ones((100, 2000), chunk_size=500)
        c = inner(a, b)
        res = self.executor.execute_tensor(c)
        expected = np.inner(np.ones((1000, 2000)), np.ones((100, 2000)))
        self.assertEqual(len(res), 2)
        self.assertTrue(np.array_equal(res[0], expected[:500, :]))
        self.assertTrue(np.array_equal(res[1], expected[500:, :]))

        a = ones((100, 100), chunk_size=30)
        b = ones((100, 100), chunk_size=30)
        c = a.dot(b)
        res = self.executor.execute_tensor(c, concat=True)[0]
        np.testing.assert_array_equal(res, np.ones((100, 100)) * 100)

    def testSparseDotSizeExecution(self):
        from mars.tensor.linalg.tensordot import TensorTensorDot
        from mars.executor import register, register_default
        chunk_sizes = dict()
        chunk_nbytes = dict()
        chunk_input_sizes = dict()
        chunk_input_nbytes = dict()

        def execute_size(t):
            def _tensordot_size_recorder(ctx, op):
                TensorTensorDot.estimate_size(ctx, op)

                chunk_key = op.outputs[0].key
                chunk_sizes[chunk_key] = ctx[chunk_key]
                chunk_nbytes[chunk_key] = op.outputs[0].nbytes

                input_sizes = dict(
                    (inp.op.key, ctx[inp.key][0]) for inp in op.inputs)
                chunk_input_sizes[chunk_key] = sum(input_sizes.values())
                input_nbytes = dict(
                    (inp.op.key, inp.nbytes) for inp in op.inputs)
                chunk_input_nbytes[chunk_key] = sum(input_nbytes.values())

            size_executor = ExecutorForTest(
                sync_provider_type=ExecutorForTest.SyncProviderType.MOCK)
            try:
                chunk_sizes.clear()
                chunk_nbytes.clear()
                chunk_input_sizes.clear()
                chunk_input_nbytes.clear()
                register(TensorTensorDot,
                         size_estimator=_tensordot_size_recorder)
                size_executor.execute_tensor(t, mock=True)
            finally:
                register_default(TensorTensorDot)

        a_data = sps.random(5, 9, density=.1)
        b_data = sps.random(9, 10, density=.2)
        a = tensor(a_data, chunk_size=2)
        b = tensor(b_data, chunk_size=3)

        c = dot(a, b)
        execute_size(c)

        for key in chunk_input_sizes.keys():
            self.assertGreaterEqual(chunk_sizes[key][1],
                                    chunk_input_sizes[key])

        c2 = dot(a, b, sparse=False)
        execute_size(c2)

        for key in chunk_input_sizes.keys():
            self.assertEqual(chunk_sizes[key][0], chunk_nbytes[key])
            self.assertEqual(chunk_sizes[key][1],
                             chunk_input_nbytes[key] + chunk_nbytes[key])

    def testSparseDotExecution(self):
        a_data = sps.random(5, 9, density=.1)
        b_data = sps.random(9, 10, density=.2)
        a = tensor(a_data, chunk_size=2)
        b = tensor(b_data, chunk_size=3)

        c = dot(a, b)

        res = self.executor.execute_tensor(c, concat=True)[0]
        self.assertTrue(issparse(res))
        np.testing.assert_allclose(res.toarray(), a_data.dot(b_data).toarray())

        c2 = dot(a, b, sparse=False)

        res = self.executor.execute_tensor(c2, concat=True)[0]
        self.assertFalse(issparse(res))
        np.testing.assert_allclose(res, a_data.dot(b_data).toarray())

        c3 = tensordot(a, b.T, (-1, -1), sparse=False)

        res = self.executor.execute_tensor(c3, concat=True)[0]
        self.assertFalse(issparse(res))
        np.testing.assert_allclose(res, a_data.dot(b_data).toarray())

        c = inner(a, b.T)

        res = self.executor.execute_tensor(c, concat=True)[0]
        self.assertTrue(issparse(res))
        np.testing.assert_allclose(res.toarray(), a_data.dot(b_data).toarray())

        c = inner(a, b.T, sparse=False)

        res = self.executor.execute_tensor(c, concat=True)[0]
        self.assertFalse(issparse(res))
        np.testing.assert_allclose(res, a_data.dot(b_data).toarray())

        # test vector inner
        a_data = np.random.rand(5)
        b_data = np.random.rand(5)
        a = tensor(a_data, chunk_size=2).tosparse()
        b = tensor(b_data, chunk_size=2).tosparse()

        c = inner(a, b)

        res = self.executor.execute_tensor(c, concat=True)[0]
        self.assertTrue(np.isscalar(res))
        np.testing.assert_allclose(res, np.inner(a_data, b_data))

    def testVdotExecution(self):
        a_data = np.array([1 + 2j, 3 + 4j])
        b_data = np.array([5 + 6j, 7 + 8j])
        a = tensor(a_data, chunk_size=1)
        b = tensor(b_data, chunk_size=1)

        t = vdot(a, b)

        res = self.executor.execute_tensor(t)[0]
        expected = np.vdot(a_data, b_data)
        np.testing.assert_equal(res, expected)

        a_data = np.array([[1, 4], [5, 6]])
        b_data = np.array([[4, 1], [2, 2]])
        a = tensor(a_data, chunk_size=1)
        b = tensor(b_data, chunk_size=1)

        t = vdot(a, b)

        res = self.executor.execute_tensor(t)[0]
        expected = np.vdot(a_data, b_data)
        np.testing.assert_equal(res, expected)

    def testMatmulExecution(self):
        data_a = np.random.randn(10, 20)
        data_b = np.random.randn(20)

        a = tensor(data_a, chunk_size=2)
        b = tensor(data_b, chunk_size=3)
        c = matmul(a, b)

        res = self.executor.execute_tensor(c, concat=True)[0]
        expected = np.matmul(data_a, data_b)
        np.testing.assert_allclose(res, expected)

        data_a = np.random.randn(10, 20)
        data_b = np.random.randn(10)

        a = tensor(data_a, chunk_size=2)
        b = tensor(data_b, chunk_size=3)
        c = matmul(b, a)

        res = self.executor.execute_tensor(c, concat=True)[0]
        expected = np.matmul(data_b, data_a)
        np.testing.assert_allclose(res, expected)

        data_a = np.random.randn(15, 1, 20, 30)
        data_b = np.random.randn(1, 11, 30, 20)

        a = tensor(data_a, chunk_size=12)
        b = tensor(data_b, chunk_size=13)
        c = matmul(a, b)

        res = self.executor.execute_tensor(c, concat=True)[0]
        expected = np.matmul(data_a, data_b)
        np.testing.assert_allclose(res, expected, atol=.0001)

        a = arange(2 * 2 * 4, chunk_size=1).reshape((2, 2, 4))
        b = arange(2 * 2 * 4, chunk_size=1).reshape((2, 4, 2))
        c = matmul(a, b)

        res = self.executor.execute_tensor(c, concat=True)[0]
        expected = np.matmul(
            np.arange(2 * 2 * 4).reshape(2, 2, 4),
            np.arange(2 * 2 * 4).reshape(2, 4, 2))
        np.testing.assert_allclose(res, expected, atol=.0001)

        data_a = sps.random(10, 20)
        data_b = sps.random(20, 5)

        a = tensor(data_a, chunk_size=2)
        b = tensor(data_b, chunk_size=3)
        c = matmul(a, b)

        res = self.executor.execute_tensor(c, concat=True)[0]
        expected = np.matmul(data_a.toarray(), data_b.toarray())
        np.testing.assert_allclose(res.toarray(), expected)

        # test order
        data_a = np.asfortranarray(np.random.randn(10, 20))
        data_b = np.asfortranarray(np.random.randn(20, 30))

        a = tensor(data_a, chunk_size=12)
        b = tensor(data_b, chunk_size=13)

        c = matmul(a, b)
        res = self.executor.execute_tensor(c, concat=True)[0]
        expected = np.matmul(data_a, data_b)

        np.testing.assert_allclose(res, expected)
        self.assertEqual(res.flags['C_CONTIGUOUS'],
                         expected.flags['C_CONTIGUOUS'])
        self.assertEqual(res.flags['F_CONTIGUOUS'],
                         expected.flags['F_CONTIGUOUS'])

        c = matmul(a, b, order='A')
        res = self.executor.execute_tensor(c, concat=True)[0]
        expected = np.matmul(data_a, data_b, order='A')

        np.testing.assert_allclose(res, expected)
        self.assertEqual(res.flags['C_CONTIGUOUS'],
                         expected.flags['C_CONTIGUOUS'])
        self.assertEqual(res.flags['F_CONTIGUOUS'],
                         expected.flags['F_CONTIGUOUS'])

        c = matmul(a, b, order='C')
        res = self.executor.execute_tensor(c, concat=True)[0]
        expected = np.matmul(data_a, data_b, order='C')

        np.testing.assert_allclose(res, expected)
        self.assertEqual(res.flags['C_CONTIGUOUS'],
                         expected.flags['C_CONTIGUOUS'])
        self.assertEqual(res.flags['F_CONTIGUOUS'],
                         expected.flags['F_CONTIGUOUS'])
Ejemplo n.º 28
0
class Test(unittest.TestCase):
    def setUp(self):
        self.executor = ExecutorForTest('numpy')

    def testReshapeExecution(self):
        x = ones((1, 2, 3), chunk_size=[4, 3, 5])
        y = x.reshape(3, 2)
        res = self.executor.execute_tensor(y)[0]
        self.assertEqual(y.shape, (3, 2))
        np.testing.assert_equal(res, np.ones((3, 2)))

        data = np.random.rand(6, 4)
        x2 = tensor(data, chunk_size=2)
        y2 = x2.reshape(3, 8, order='F')
        res = self.executor.execute_tensor(y2, concat=True)[0]
        expected = data.reshape((3, 8), order='F')
        np.testing.assert_array_equal(res, expected)
        self.assertTrue(res.flags['F_CONTIGUOUS'])
        self.assertFalse(res.flags['C_CONTIGUOUS'])

        data2 = np.asfortranarray(np.random.rand(6, 4))
        x3 = tensor(data2, chunk_size=2)
        y3 = x3.reshape(3, 8)
        res = self.executor.execute_tensor(y3, concat=True)[0]
        expected = data2.reshape((3, 8))
        np.testing.assert_array_equal(res, expected)
        self.assertTrue(res.flags['C_CONTIGUOUS'])
        self.assertFalse(res.flags['F_CONTIGUOUS'])

        data2 = np.asfortranarray(np.random.rand(6, 4))
        x3 = tensor(data2, chunk_size=2)
        y3 = x3.reshape(3, 8, order='F')
        res = self.executor.execute_tensor(y3, concat=True)[0]
        expected = data2.reshape((3, 8), order='F')
        np.testing.assert_array_equal(res, expected)
        self.assertTrue(res.flags['F_CONTIGUOUS'])
        self.assertFalse(res.flags['C_CONTIGUOUS'])

    def testShuffleReshapeExecution(self):
        a = ones((31, 27), chunk_size=10)
        b = a.reshape(27, 31)
        b.op.extra_params['_reshape_with_shuffle'] = True

        res = self.executor.execute_tensor(b, concat=True)[0]
        np.testing.assert_array_equal(res, np.ones((27, 31)))

        b2 = a.reshape(27, 31, order='F')
        b.op.extra_params['_reshape_with_shuffle'] = True
        res = self.executor.execute_tensor(b2)[0]
        self.assertTrue(res.flags['F_CONTIGUOUS'])
        self.assertFalse(res.flags['C_CONTIGUOUS'])

        data = np.random.rand(6, 4)
        x2 = tensor(data, chunk_size=2)
        y2 = x2.reshape(4, 6, order='F')
        y2.op.extra_params['_reshape_with_shuffle'] = True
        res = self.executor.execute_tensor(y2, concat=True)[0]
        expected = data.reshape((4, 6), order='F')
        np.testing.assert_array_equal(res, expected)
        self.assertTrue(res.flags['F_CONTIGUOUS'])
        self.assertFalse(res.flags['C_CONTIGUOUS'])

        data2 = np.asfortranarray(np.random.rand(6, 4))
        x3 = tensor(data2, chunk_size=2)
        y3 = x3.reshape(4, 6)
        y3.op.extra_params['_reshape_with_shuffle'] = True
        res = self.executor.execute_tensor(y3, concat=True)[0]
        expected = data2.reshape((4, 6))
        np.testing.assert_array_equal(res, expected)
        self.assertTrue(res.flags['C_CONTIGUOUS'])
        self.assertFalse(res.flags['F_CONTIGUOUS'])
Ejemplo n.º 29
0
    def testStoreHDF5Execution(self):
        raw = np.random.RandomState(0).rand(10, 20)

        group_name = 'test_group'
        dataset_name = 'test_dataset'

        t1 = tensor(raw, chunk_size=20)
        t2 = tensor(raw, chunk_size=9)

        with self.assertRaises(TypeError):
            tohdf5(object(), t2)

        this = self

        class MockSession:
            def __init__(self):
                self.executor = this.executor

        ctx = LocalContext(MockSession())
        executor = ExecutorForTest('numpy', storage=ctx)
        with ctx:
            with tempfile.TemporaryDirectory() as d:
                filename = os.path.join(d, 'test_store_{}.hdf5'.format(int(time.time())))

                # test 1 chunk
                r = tohdf5(filename, t1, group=group_name, dataset=dataset_name)

                executor.execute_tensor(r)

                with h5py.File(filename, 'r') as f:
                    result = np.asarray(f['{}/{}'.format(group_name, dataset_name)])
                    np.testing.assert_array_equal(result, raw)

                # test filename
                r = tohdf5(filename, t2, group=group_name, dataset=dataset_name)

                executor.execute_tensor(r)

                rt = get_tiled(r)
                self.assertEqual(type(rt.chunks[0].inputs[1].op).__name__, 'SuccessorsExclusive')
                self.assertEqual(len(rt.chunks[0].inputs[1].inputs), 0)

                with h5py.File(filename, 'r') as f:
                    result = np.asarray(f['{}/{}'.format(group_name, dataset_name)])
                    np.testing.assert_array_equal(result, raw)

                with self.assertRaises(ValueError):
                    tohdf5(filename, t2)

                with h5py.File(filename, 'r') as f:
                    # test file
                    r = tohdf5(f, t2, group=group_name, dataset=dataset_name)

                executor.execute_tensor(r)

                with h5py.File(filename, 'r') as f:
                    result = np.asarray(f['{}/{}'.format(group_name, dataset_name)])
                    np.testing.assert_array_equal(result, raw)

                with self.assertRaises(ValueError):
                    with h5py.File(filename, 'r') as f:
                        tohdf5(f, t2)

                with h5py.File(filename, 'r') as f:
                    # test dataset
                    ds = f['{}/{}'.format(group_name, dataset_name)]
                    # test file
                    r = tohdf5(ds, t2)

                executor.execute_tensor(r)

                with h5py.File(filename, 'r') as f:
                    result = np.asarray(f['{}/{}'.format(group_name, dataset_name)])
                    np.testing.assert_array_equal(result, raw)
Ejemplo n.º 30
0
class Test(TestBase):
    def setUp(self):
        self.executor = ExecutorForTest('numpy')

    def testEinsumExecution(self):
        data1 = np.random.rand(3, 4, 5)
        data2 = np.random.rand(4, 3, 2)

        t1 = tensor(data1, chunk_size=2)
        t2 = tensor(data2, chunk_size=3)
        t = einsum('ijk, jil -> kl', t1, t2)
        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.einsum('ijk, jil -> kl', data1, data2)
        np.testing.assert_almost_equal(res, expected)

        # dot
        t = einsum('ijk, jil', t1, t2, optimize=True)
        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.einsum('ijk, jil', data1, data2, optimize=True)
        np.testing.assert_almost_equal(res, expected)

        # multiply(data1, data2)
        data1 = np.random.rand(6, 6)
        data2 = np.random.rand(6, 6)
        t1 = tensor(data1, chunk_size=3)
        t2 = tensor(data2, chunk_size=3)
        t = einsum('..., ...', t1, t2, order='C')
        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.einsum('..., ...', data1, data2, order='C')
        np.testing.assert_almost_equal(res, expected)

        # sum(data, axis=-1)
        data = np.random.rand(10)
        t1 = tensor(data, chunk_size=3)
        t = einsum('i->', t1, order='F')
        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.einsum('i->', data, order='F')
        np.testing.assert_almost_equal(res, expected)

        # sum(data, axis=0)
        t1 = tensor(data)
        t = einsum('...i->...', t1)
        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.einsum('...i->...', data)
        np.testing.assert_almost_equal(res, expected)

        # test broadcast
        data1 = np.random.rand(1, 10, 9)
        data2 = np.random.rand(9, 6)
        data3 = np.random.rand(10, 6)
        data4 = np.random.rand(8, )

        t1 = tensor(data1, chunk_size=(1, (5, 5), (3, 3, 3)))
        t2 = tensor(data2, chunk_size=((3, 3, 3), (3, 3)))
        t3 = tensor(data3, chunk_size=((6, 4), (4, 2)))
        t4 = tensor(data4, chunk_size=4)
        t = einsum('ajk,kl,jl,a->a', t1, t2, t3, t4, optimize='optimal')
        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.einsum('ajk,kl,jl,a->a',
                             data1,
                             data2,
                             data3,
                             data4,
                             optimize='optimal')
        np.testing.assert_almost_equal(res, expected)

        t = einsum('ajk,kl,jl,a->a', t1, t2, t3, t4, optimize='greedy')
        res = self.executor.execute_tensor(t, concat=True)[0]
        expected = np.einsum('ajk,kl,jl,a->a',
                             data1,
                             data2,
                             data3,
                             data4,
                             optimize='greedy')
        np.testing.assert_almost_equal(res, expected)