Exemple #1
0
    def testBoolIndexingTiles(self):
        t = ones((100, 200, 300), chunk_size=30)
        indexed = t[t < 2]
        indexed = indexed.tiles()
        t = get_tiled(t)

        self.assertEqual(len(indexed.chunks), 280)
        self.assertEqual(indexed.chunks[0].index, (0, ))
        self.assertEqual(indexed.chunks[20].index, (20, ))
        self.assertIs(indexed.chunks[20].inputs[0], t.cix[(0, 2, 0)].data)
        self.assertIs(indexed.chunks[20].inputs[1],
                      indexed.op.indexes[0].cix[0, 2, 0].data)

        t2 = ones((100, 200), chunk_size=30)
        indexed2 = t[t2 < 2]
        indexed2 = indexed2.tiles()
        t = get_tiled(t)

        self.assertEqual(len(indexed2.chunks), 280)
        self.assertEqual(len(indexed2.chunks[0].shape), 2)
        self.assertTrue(np.isnan(indexed2.chunks[0].shape[0]))
        self.assertEqual(indexed2.chunks[0].shape[1], 30)
        self.assertEqual(indexed2.chunks[20].inputs[0], t.cix[(0, 2, 0)].data)
        self.assertEqual(indexed2.chunks[20].inputs[1],
                         indexed2.op.indexes[0].cix[0, 2].data)
Exemple #2
0
    def testUnifyChunkAdd(self):
        t1 = ones(4, chunk_size=2)
        t2 = ones(1, chunk_size=1)

        t3 = t1 + t2
        t3 = t3.tiles()
        t1, t2 = get_tiled(t1), get_tiled(t2)
        self.assertEqual(len(t3.chunks), 2)
        self.assertEqual(t3.chunks[0].inputs[0], t1.chunks[0].data)
        self.assertEqual(t3.chunks[0].inputs[1], t2.chunks[0].data)
        self.assertEqual(t3.chunks[1].inputs[0], t1.chunks[1].data)
        self.assertEqual(t3.chunks[1].inputs[1], t2.chunks[0].data)
Exemple #3
0
    def testAddWithOut(self):
        t1 = ones((3, 4), chunk_size=2)
        t2 = ones(4, chunk_size=2)

        t3 = add(t1, t2, out=t1)

        self.assertIsInstance(t1.op, TensorAdd)
        self.assertEqual(t1.op.out.key, t1.op.lhs.key)
        self.assertIs(t3, t1)
        self.assertEqual(t3.shape, (3, 4))
        self.assertEqual(t3.op.lhs.extra_params.raw_chunk_size, 2)
        self.assertIs(t3.op.rhs, t2.data)
        self.assertNotEqual(t3.key, t3.op.lhs.key)

        t3.tiles()
        t1 = get_tiled(t1)

        self.assertIsInstance(t1.chunks[0].op, TensorAdd)
        self.assertEqual(t1.chunks[0].op.out.key, t1.chunks[0].op.lhs.key)

        with self.assertRaises(TypeError):
            add(t1, t2, out=1)

        with self.assertRaises(ValueError):
            add(t1, t2, out=t2)

        with self.assertRaises(TypeError):
            truediv(t1, t2, out=t1.astype('i8'))

        t1 = ones((3, 4), chunk_size=2, dtype=float)
        t2 = ones(4, chunk_size=2, dtype=int)

        t3 = add(t2, 1, out=t1)
        self.assertEqual(t3.shape, (3, 4))
        self.assertEqual(t3.dtype, np.float64)
Exemple #4
0
    def testAggregateResult(self):
        rs = np.random.RandomState(0)
        raw = rs.rand(10, 10)
        t = tensor(raw, chunk_size=6)

        slc = slice(None, None, 3)

        # test no reorder
        fancy_index = np.array([3, 6, 7])
        indexes = [slc, fancy_index]
        result = t[indexes].tiles()

        handler = NDArrayIndexesHandler()

        context = handler.handle(result.op, return_context=True)
        self.assertGreater(context.op.outputs[0].chunk_shape[-1], 1)
        chunk_results = self.executor.execute_tensor(result)
        chunk_results = \
            [(c.index, r) for c, r in zip(get_tiled(result).chunks, chunk_results)]
        expected = self.executor.execute_tensor(result, concat=True)[0]
        res = handler.aggregate_result(context, chunk_results)
        np.testing.assert_array_equal(res, expected)

        # test fancy index that requires reordering
        fancy_index = np.array([6, 7, 3])
        indexes = [slc, fancy_index]
        test = t[indexes].tiles()

        context = handler.handle(test.op, return_context=True)
        self.assertEqual(context.op.outputs[0].chunk_shape[-1], 1)
        res = handler.aggregate_result(context, chunk_results)
        expected = self.executor.execute_tensor(test, concat=True)[0]
        np.testing.assert_array_equal(res, expected)
Exemple #5
0
    def testToCPU(self):
        data = pd.DataFrame(np.random.rand(10, 10),
                            index=np.random.randint(-100, 100, size=(10, )),
                            columns=[np.random.bytes(10) for _ in range(10)])
        df = from_pandas_df(data)
        cdf = to_gpu(df)
        df2 = to_cpu(cdf)

        self.assertEqual(df.index_value, df2.index_value)
        self.assertEqual(df.columns_value, df2.columns_value)
        self.assertFalse(df2.op.gpu)
        pd.testing.assert_series_equal(df.dtypes, df2.dtypes)

        df2 = df2.tiles()
        df = get_tiled(df)

        self.assertEqual(df.nsplits, df2.nsplits)
        self.assertEqual(df.chunks[0].index_value, df2.chunks[0].index_value)
        self.assertEqual(df.chunks[0].columns_value,
                         df2.chunks[0].columns_value)
        self.assertFalse(df2.chunks[0].op.gpu)
        pd.testing.assert_series_equal(df.chunks[0].dtypes,
                                       df2.chunks[0].dtypes)

        self.assertIs(df2, to_cpu(df2))
Exemple #6
0
    def testPermutation(self):
        x = permutation(10)

        self.assertEqual(x.shape, (10, ))
        self.assertIsInstance(x.op, TensorPermutation)

        x = x.tiles()

        self.assertEqual(len(x.chunks), 1)
        self.assertIsInstance(x.chunks[0].op, TensorPermutation)

        arr = from_ndarray([1, 4, 9, 12, 15], chunk_size=2)
        x = permutation(arr)

        self.assertEqual(x.shape, (5, ))
        self.assertIsInstance(x.op, TensorPermutation)

        x = x.tiles()
        arr = get_tiled(arr)

        self.assertEqual(len(x.chunks), 3)
        self.assertTrue(np.isnan(x.chunks[0].shape[0]))
        self.assertIs(x.chunks[0].inputs[0].inputs[0].inputs[0],
                      arr.chunks[0].data)

        arr = rand(3, 3, chunk_size=2)
        x = permutation(arr)

        self.assertEqual(x.shape, (3, 3))
        self.assertIsInstance(x.op, TensorPermutation)

        x = x.tiles()
        arr = get_tiled(arr)

        self.assertEqual(len(x.chunks), 4)
        self.assertTrue(np.isnan(x.chunks[0].shape[0]))
        self.assertEqual(x.chunks[0].shape[1], 2)
        self.assertIs(x.cix[0, 0].inputs[0].inputs[0].inputs[0],
                      arr.cix[0, 0].data)
        self.assertIs(x.cix[0, 0].inputs[0].inputs[1].inputs[0],
                      arr.cix[1, 0].data)
        self.assertEqual(x.cix[0, 0].op.seed, x.cix[0, 1].op.seed)
        self.assertEqual(x.cix[0, 0].inputs[0].inputs[0].inputs[0].op.seed,
                         x.cix[1, 0].inputs[0].inputs[0].inputs[0].op.seed)

        with self.assertRaises(np.AxisError):
            self.assertRaises(permutation('abc'))
Exemple #7
0
    def testReExecuteSame(self):
        data = np.random.random((5, 9))

        # test run the same tensor
        arr4 = mt.tensor(data.copy(), chunk_size=3) + 1
        result1 = arr4.to_numpy()
        expected = data + 1

        np.testing.assert_array_equal(result1, expected)

        result2 = arr4.to_numpy()

        np.testing.assert_array_equal(result1, result2)

        # test run the same tensor with single chunk
        arr4 = mt.tensor(data.copy())
        result1 = arr4.to_numpy()
        expected = data

        np.testing.assert_array_equal(result1, expected)

        result2 = arr4.to_numpy()
        np.testing.assert_array_equal(result1, result2)

        # modify result
        sess = Session.default_or_local()
        executor = sess._sess._executor
        executor.chunk_result[get_tiled(arr4).chunks[0].key] = data + 2

        result3 = arr4.to_numpy()
        np.testing.assert_array_equal(result3, data + 2)

        # test run same key tensor
        arr5 = mt.ones((10, 10), chunk_size=3)
        result1 = arr5.to_numpy()

        del arr5
        arr6 = mt.ones((10, 10), chunk_size=3)
        result2 = arr6.to_numpy()

        np.testing.assert_array_equal(result1, result2)

        # test copy, make sure it will not let the execution cache missed
        df = md.DataFrame(mt.ones((10, 3), chunk_size=5))
        executed = [False]

        def add_one(x):
            if executed[0]:  # pragma: no cover
                raise ValueError('executed before')
            return x + 1

        df2 = df.apply(add_one)
        pd.testing.assert_frame_equal(df2.to_pandas(), pd.DataFrame(np.ones((10, 3)) + 1))

        executed[0] = True

        df3 = df2.copy()
        df4 = df3 * 2
        pd.testing.assert_frame_equal(df4.to_pandas(), pd.DataFrame(np.ones((10, 3)) * 4))
Exemple #8
0
    def testIndicesIndexingTiles(self):
        t = ones((10, 20, 30), chunk_size=(2, 20, 30))
        t2 = t[3]
        t2 = t2.tiles()
        t = get_tiled(t)

        self.assertEqual(len(t2.chunks), 1)
        self.assertIs(t2.chunks[0].inputs[0], t.cix[1, 0, 0].data)
        self.assertEqual(t2.chunks[0].op.indexes[0], 1)

        t3 = t[4]
        t3 = t3.tiles()
        t = get_tiled(t)

        self.assertEqual(len(t3.chunks), 1)
        self.assertIs(t3.chunks[0].inputs[0], t.cix[2, 0, 0].data)
        self.assertEqual(t3.chunks[0].op.indexes[0], 0)
Exemple #9
0
    def testToGPU(self):
        # test dataframe
        data = pd.DataFrame(np.random.rand(10, 10),
                            index=np.random.randint(-100, 100, size=(10, )),
                            columns=[np.random.bytes(10) for _ in range(10)])
        df = from_pandas_df(data)
        cdf = to_gpu(df)

        self.assertEqual(df.index_value, cdf.index_value)
        self.assertEqual(df.columns_value, cdf.columns_value)
        self.assertTrue(cdf.op.gpu)
        pd.testing.assert_series_equal(df.dtypes, cdf.dtypes)

        cdf = cdf.tiles()
        df = get_tiled(df)

        self.assertEqual(df.nsplits, cdf.nsplits)
        self.assertEqual(df.chunks[0].index_value, cdf.chunks[0].index_value)
        self.assertEqual(df.chunks[0].columns_value,
                         cdf.chunks[0].columns_value)
        self.assertTrue(cdf.chunks[0].op.gpu)
        pd.testing.assert_series_equal(df.chunks[0].dtypes,
                                       cdf.chunks[0].dtypes)

        self.assertIs(cdf, to_gpu(cdf))

        # test series
        sdata = data.iloc[:, 0]
        series = from_pandas_series(sdata)
        cseries = to_gpu(series)

        self.assertEqual(series.index_value, cseries.index_value)
        self.assertTrue(cseries.op.gpu)

        cseries = cseries.tiles()
        series = get_tiled(series)

        self.assertEqual(series.nsplits, cseries.nsplits)
        self.assertEqual(series.chunks[0].index_value,
                         cseries.chunks[0].index_value)
        self.assertTrue(cseries.chunks[0].op.gpu)

        self.assertIs(cseries, to_gpu(cseries))
Exemple #10
0
    def testSendTargets(self):
        pool_address = f'127.0.0.1:{get_next_port()}'
        session_id = str(uuid.uuid4())
        mock_data = np.array([1, 2, 3, 4])
        with create_actor_pool(n_process=1,
                               backend='gevent',
                               address=pool_address,
                               distributor=MarsDistributor(2, 'w:0:')) as pool:
            self.create_standard_actors(pool,
                                        pool_address,
                                        with_daemon=False,
                                        with_status=False)
            pool.create_actor(CpuCalcActor)
            pool.create_actor(InProcHolderActor)

            import mars.tensor as mt
            arr = mt.ones((4, ), chunk_size=4)
            arr_add = mt.array(mock_data)
            result_tensor = arr + arr_add
            graph = result_tensor.build_graph(fuse_enabled=False, tiled=True)
            result_tensor = get_tiled(result_tensor)
            result_key = result_tensor.chunks[0].key

            pool.create_actor(MockSenderActor, [mock_data + np.ones((4, ))],
                              'out',
                              uid='w:mock_sender')
            with self.run_actor_test(pool) as test_actor:

                def _validate(*_):
                    data = test_actor.shared_store.get(
                        session_id, result_tensor.chunks[0].key)
                    assert_array_equal(data, mock_data + np.ones((4, )))

                graph_key = str(uuid.uuid4())
                execution_ref = test_actor.promise_ref(
                    ExecutionActor.default_uid())

                execution_ref.execute_graph(
                    session_id,
                    graph_key,
                    serialize_graph(graph),
                    dict(chunks=[result_tensor.chunks[0].key]),
                    None,
                    _tell=True)
                execution_ref.send_data_to_workers(
                    session_id,
                    graph_key, {result_key: (pool_address, )},
                    _tell=True)

                execution_ref.add_finish_callback(session_id, graph_key, _promise=True) \
                    .then(_validate) \
                    .then(lambda *_: test_actor.set_result(None)) \
                    .catch(lambda *exc: test_actor.set_result(exc, False))

            self.get_result()
Exemple #11
0
    def testMultiOutputsOp(self):
        sess = new_session()

        rs = np.random.RandomState(0)
        raw = rs.rand(20, 5)
        a = mt.tensor(raw, chunk_size=5)
        q = mt.abs(mt.linalg.qr(a)[0])

        ret = sess.run(q)
        np.testing.assert_almost_equal(ret, np.abs(np.linalg.qr(raw)[0]))
        self.assertEqual(len(sess._sess.executor.chunk_result),
                         len(get_tiled(q).chunks))
Exemple #12
0
    def testReadZarrExecution(self):
        test_array = np.random.RandomState(0).rand(20, 10)
        group_name = 'test_group'
        dataset_name = 'test_dataset'

        with self.assertRaises(TypeError):
            fromzarr(object())

        with tempfile.TemporaryDirectory() as d:
            path = os.path.join(d, f'test_read_{int(time.time())}.zarr')

            group = zarr.group(path)
            arr = group.array(group_name + '/' + dataset_name,
                              test_array,
                              chunks=(7, 4))

            r = fromzarr(arr)

            result = self.executor.execute_tensor(r, concat=True)[0]
            np.testing.assert_array_equal(result, test_array)
            self.assertGreater(len(get_tiled(r).chunks), 1)

            arr = zarr.open_array(f'{path}/{group_name}/{dataset_name}')
            r = fromzarr(arr)

            result = self.executor.execute_tensor(r, concat=True)[0]
            np.testing.assert_array_equal(result, test_array)
            self.assertGreater(len(get_tiled(r).chunks), 1)

            r = fromzarr(path, group=group_name, dataset=dataset_name)

            result = self.executor.execute_tensor(r, concat=True)[0]
            np.testing.assert_array_equal(result, test_array)
            self.assertGreater(len(get_tiled(r).chunks), 1)

            r = fromzarr(path + '/' + group_name + '/' + dataset_name)

            result = self.executor.execute_tensor(r, concat=True)[0]
            np.testing.assert_array_equal(result, test_array)
            self.assertGreater(len(get_tiled(r).chunks), 1)
    def testFancyIndexingNumpyExecution(self):
        # test fancy index of type numpy ndarray
        raw = np.random.random((11, 8, 12, 14))
        arr = tensor(raw, chunk_size=(2, 5, 7, 8))

        index = [9, 10, 3, 1, 8, 10]
        arr2 = arr[index]

        res = self.executor.execute_tensor(arr2, concat=True)[0]
        np.testing.assert_array_equal(res, raw[index])

        index = np.random.permutation(8)
        arr3 = arr[:2, ..., index]

        res = self.executor.execute_tensor(arr3, concat=True)[0]
        np.testing.assert_array_equal(res, raw[:2, ..., index])

        index = [1, 3, 9, 10]
        arr4 = arr[..., index, :5]

        res = self.executor.execute_tensor(arr4, concat=True)[0]
        np.testing.assert_array_equal(res, raw[..., index, :5])

        index1 = [8, 10, 3, 1, 9, 10]
        index2 = [1, 3, 9, 10, 2, 7]
        arr5 = arr[index1, :, index2]

        res = self.executor.execute_tensor(arr5, concat=True)[0]
        np.testing.assert_array_equal(res, raw[index1, :, index2])

        index1 = [1, 3, 5, 7, 9, 10]
        index2 = [1, 9, 9, 10, 2, 7]
        arr6 = arr[index1, :, index2]

        res = self.executor.execute_tensor(arr6, concat=True)[0]
        np.testing.assert_array_equal(res, raw[index1, :, index2])
        # fancy index is ordered, no concat required
        self.assertGreater(len(get_tiled(arr6).nsplits[0]), 1)

        index1 = [[8, 10, 3], [1, 9, 10]]
        index2 = [[1, 3, 9], [10, 2, 7]]
        arr7 = arr[index1, :, index2]

        res = self.executor.execute_tensor(arr7, concat=True)[0]
        np.testing.assert_array_equal(res, raw[index1, :, index2])

        index1 = [[1, 3], [3, 7], [7, 7]]
        index2 = [1, 9]
        arr8 = arr[0, index1, :, index2]

        res = self.executor.execute_tensor(arr8, concat=True)[0]
        np.testing.assert_array_equal(res, raw[0, index1, :, index2])
Exemple #14
0
    def testMergeOneChunk(self):
        df1 = pd.DataFrame({'lkey': ['foo', 'bar', 'baz', 'foo'],
                            'value': [1, 2, 3, 5]})
        df2 = pd.DataFrame({'rkey': ['foo', 'bar', 'baz', 'foo'],
                            'value': [5, 6, 7, 8]})

        # all have one chunk
        mdf1 = from_pandas(df1)
        mdf2 = from_pandas(df2)
        df = mdf1.merge(mdf2, left_on='lkey', right_on='rkey')
        tiled = df.tiles()

        self.assertEqual(tiled.chunk_shape, (1, 1))
        self.assertEqual(tiled.chunks[0].inputs[0].key, get_tiled(mdf1).chunks[0].key)
        self.assertEqual(tiled.chunks[0].inputs[1].key, get_tiled(mdf2).chunks[0].key)

        # left has one chunk
        mdf1 = from_pandas(df1)
        mdf2 = from_pandas(df2, chunk_size=2)
        df = mdf1.merge(mdf2, left_on='lkey', right_on='rkey')
        tiled = df.tiles()

        self.assertEqual(tiled.chunk_shape, (2, 1))
        self.assertEqual(tiled.chunks[0].inputs[0].key, get_tiled(mdf1).chunks[0].key)
        self.assertEqual(tiled.chunks[0].inputs[1].key, get_tiled(mdf2).chunks[0].key)
        self.assertEqual(tiled.chunks[1].inputs[0].key, get_tiled(mdf1).chunks[0].key)
        self.assertEqual(tiled.chunks[1].inputs[1].key, get_tiled(mdf2).chunks[1].key)

        # right has one chunk
        mdf1 = from_pandas(df1, chunk_size=2)
        mdf2 = from_pandas(df2)
        df = mdf1.merge(mdf2, left_on='lkey', right_on='rkey')
        tiled = df.tiles()

        self.assertEqual(tiled.chunk_shape, (2, 1))
        self.assertEqual(tiled.chunks[0].inputs[0].key, get_tiled(mdf1).chunks[0].key)
        self.assertEqual(tiled.chunks[0].inputs[1].key, get_tiled(mdf2).chunks[0].key)
        self.assertEqual(tiled.chunks[1].inputs[0].key, get_tiled(mdf1).chunks[1].key)
        self.assertEqual(tiled.chunks[1].inputs[1].key, get_tiled(mdf2).chunks[0].key)
Exemple #15
0
    def testUnravelIndex(self):
        indices = tensor([22, 41, 37], chunk_size=1)
        t = unravel_index(indices, (7, 6))

        self.assertEqual(len(t), 2)

        [r.tiles() for r in t]
        t = [get_tiled(r) for r in t]

        self.assertEqual(len(t[0].chunks), 3)
        self.assertEqual(len(t[1].chunks), 3)

        with self.assertRaises(TypeError):
            unravel_index([22, 41, 37], (7, 6), order='B')
Exemple #16
0
    def testBetaInc(self):
        raw1 = np.random.rand(4, 3, 2)
        raw2 = np.random.rand(4, 3, 2)
        raw3 = np.random.rand(4, 3, 2)
        a = tensor(raw1, chunk_size=3)
        b = tensor(raw2, chunk_size=3)
        c = tensor(raw3, chunk_size=3)

        r = betainc(a, b, c)
        expect = scipy_betainc(raw1, raw2, raw3)

        self.assertEqual(r.shape, raw1.shape)
        self.assertEqual(r.dtype, expect.dtype)

        r = r.tiles()
        tiled_a = get_tiled(a)

        self.assertEqual(r.nsplits, tiled_a.nsplits)
        for chunk in r.chunks:
            self.assertIsInstance(chunk.op, TensorBetaInc)
            self.assertEqual(chunk.index, chunk.inputs[0].index)
            self.assertEqual(chunk.shape, chunk.inputs[0].shape)

        betainc(a, b, c, out=a)
        expect = scipy_betainc(raw1, raw2, raw3)

        self.assertEqual(a.shape, raw1.shape)
        self.assertEqual(a.dtype, expect.dtype)

        tiled_a = a.tiles()
        b = get_tiled(b)

        self.assertEqual(tiled_a.nsplits, b.nsplits)
        for c in r.chunks:
            self.assertIsInstance(c.op, TensorBetaInc)
            self.assertEqual(c.index, c.inputs[0].index)
            self.assertEqual(c.shape, c.inputs[0].shape)
Exemple #17
0
    def testIterativeTiling(self):
        sess = new_session()

        rs = np.random.RandomState(0)
        raw = rs.rand(100)
        a = mt.tensor(raw, chunk_size=10)
        a.sort()
        c = a[:5]

        ret = sess.run(c)
        np.testing.assert_array_equal(ret, np.sort(raw)[:5])

        executor = sess._sess.executor
        self.assertEqual(len(executor.chunk_result), 1)
        executor.chunk_result.clear()

        raw1 = rs.rand(20)
        raw2 = rs.rand(20)
        a = mt.tensor(raw1, chunk_size=10)
        a.sort()
        b = mt.tensor(raw2, chunk_size=15) + 1
        c = mt.concatenate([a[:10], b])
        c.sort()
        d = c[:5]

        ret = sess.run(d)
        expected = np.sort(np.concatenate([np.sort(raw1)[:10], raw2 + 1]))[:5]
        np.testing.assert_array_equal(ret, expected)
        self.assertEqual(len(executor.chunk_result), len(get_tiled(d).chunks))

        raw = rs.rand(100)
        a = mt.tensor(raw, chunk_size=10)
        a.sort()
        b = a + 1
        c = b[:5]

        ret = sess.run([b, c])
        expected = np.sort(raw + 1)[:5]
        np.testing.assert_array_equal(ret[1], expected)

        raw = rs.randint(100, size=(100,))
        a = mt.tensor(raw, chunk_size=23)
        a.sort()
        b = mt.histogram(a, bins='stone')

        res = sess.run(b)
        expected = np.histogram(np.sort(raw), bins='stone')
        np.testing.assert_almost_equal(res[0], expected[0])
        np.testing.assert_almost_equal(res[1], expected[1])
Exemple #18
0
    def testMixedIndexingTiles(self):
        t = ones((100, 200, 300, 400), chunk_size=24)

        cmp = ones(400, chunk_size=24) < 2
        t2 = t[10:90:3, 5, ..., None, cmp]
        t2 = t2.tiles()
        cmp = get_tiled(cmp)

        self.assertEqual(t2.shape[:-1], (27, 300, 1))
        self.assertTrue(np.isnan(t2.shape[-1]))
        self.assertEqual(t2.chunk_shape, (4, 13, 1, 17))
        self.assertEqual(
            t2.chunks[0].op.indexes,
            [slice(10, 24, 3), 5,
             slice(None), None, cmp.cix[0, ].data])
Exemple #19
0
    def testGraphDeviceAssigner(self):
        import mars.tensor as mt

        a = mt.random.rand(10, 10, chunk_size=5, gpu=True)
        b = a.sum(axis=1)
        graph = b.build_graph(tiled=True, fuse_enabled=False)

        assigner = GraphDeviceAssigner(graph,
                                       list(n.op for n in graph.iter_indep()),
                                       devices=[0, 1])
        assigner.assign()

        a = get_tiled(a)
        self.assertEqual(a.cix[0, 0].device, a.cix[0, 1].device)
        self.assertEqual(a.cix[1, 0].device, a.cix[1, 1].device)
        self.assertNotEqual(a.cix[0, 0].device, a.cix[1, 0].device)
Exemple #20
0
    def testFetch(self):
        from mars.session import Session

        with option_context({'eager_mode': True}):
            arr1 = mt.ones((10, 5), chunk_size=4)
            np.testing.assert_array_equal(arr1, np.ones((10, 5)))

            sess = Session.default_or_local()
            executor = sess._sess._executor
            executor.chunk_result[get_tiled(arr1).chunks[0].key] = np.ones(
                (4, 4)) * 2

            arr2 = mt.ones((10, 5), chunk_size=4) - 1
            result = arr2.fetch()
            np.testing.assert_array_equal(result[:4, :4], np.ones((4, 4)))
            np.testing.assert_array_equal(result[8:, :4], np.zeros((2, 4)))
Exemple #21
0
    def testExecuteBothExecutedAndNot(self):
        data = np.random.random((5, 9))

        arr1 = mt.tensor(data, chunk_size=4) * 2
        arr2 = mt.tensor(data) + 1

        np.testing.assert_array_equal(arr2.to_numpy(), data + 1)

        # modify result
        sess = Session.default_or_local()
        executor = sess._sess._executor
        executor.chunk_result[get_tiled(arr2).chunks[0].key] = data + 2

        results = sess.run(arr1, arr2)
        np.testing.assert_array_equal(results[0], data * 2)
        np.testing.assert_array_equal(results[1], data + 2)
Exemple #22
0
    def testRechunk(self):
        tensor = ones((12, 9), chunk_size=4)
        new_tensor = tensor.rechunk(3)
        new_tensor = new_tensor.tiles()

        self.assertEqual(len(new_tensor.chunks), 12)
        self.assertEqual(new_tensor.chunks[0].inputs[0], get_tiled(tensor).chunks[0].data)
        self.assertEqual(len(new_tensor.chunks[1].inputs), 2)
        self.assertEqual(new_tensor.chunks[1].inputs[0].op.slices,
                         [slice(None, 3, None), slice(3, None, None)])
        self.assertEqual(new_tensor.chunks[1].inputs[1].op.slices,
                         [slice(None, 3, None), slice(None, 2, None)])
        self.assertEqual(len(new_tensor.chunks[-1].inputs), 2)
        self.assertEqual(new_tensor.chunks[-1].inputs[0].op.slices,
                         [slice(1, None, None), slice(2, None, None)])
        self.assertEqual(new_tensor.chunks[-1].inputs[1].op.slices,
                         [slice(1, None, None), slice(None, None, None)])
Exemple #23
0
Fichier : core.py Projet : h8f/mars
    def _check_nsplits(self, tileable):
        from mars.core import get_tiled
        tiled = get_tiled(tileable)
        if tiled.nsplits == () and len(tiled.chunks) == 1:
            return

        nsplit_chunk_shape = tuple(len(s) for s in tiled.nsplits)
        if nsplit_chunk_shape != tiled.chunk_shape:
            raise AssertionError(
                'Operand %r: shape of nsplits %r not consistent with chunk shape %r'
                % (tiled.op, nsplit_chunk_shape, tiled.chunk_shape)) from None

        nsplit_shape = tuple(np.sum(s) for s in tiled.nsplits)
        try:
            self.assert_shape_consistent(nsplit_shape, tiled.shape)
        except AssertionError:
            raise AssertionError(
                'Operand %r: shape computed from nsplits %r -> %r not consistent with real shape %r'
                %
                (tiled.op, tiled.nsplits, nsplit_shape, tiled.shape)) from None

        for c in tiled.chunks:
            try:
                tiled_c = tiled.cix[c.index]
            except ValueError as ex:
                raise AssertionError(
                    'Operand %r: Malformed index %r, nsplits is %r. Raw error is %r'
                    % (c.op, c.index, tiled.nsplits, ex)) from None

            if tiled_c is not c:
                raise AssertionError(
                    'Operand %r: Cannot spot chunk via index %r, nsplits is %r'
                    % (c.op, c.index, tiled.nsplits))
        for cid, shape in enumerate(itertools.product(*tiled.nsplits)):
            chunk_shape = self._raw_chunk_shapes.get(
                tiled.chunks[cid].key) or tiled.chunks[cid].shape
            if len(shape) != len(chunk_shape):
                raise AssertionError(
                    'Operand %r: Shape in nsplits %r does not meet shape in chunk %r'
                    % (tiled.chunks[cid].op, shape, chunk_shape))
            for s1, s2 in zip(shape, chunk_shape):
                if (not (np.isnan(s1) and np.isnan(s2))) and s1 != s2:
                    raise AssertionError(
                        'Operand %r: Shape in nsplits %r does not meet shape in chunk %r'
                        % (tiled.chunks[cid].op, shape, chunk_shape))
Exemple #24
0
    def testElf(self):
        raw = np.random.rand(10, 8, 5)
        t = tensor(raw, chunk_size=3)

        r = erf(t)
        expect = scipy_erf(raw)

        self.assertEqual(r.shape, raw.shape)
        self.assertEqual(r.dtype, expect.dtype)

        r = r.tiles()
        t = get_tiled(t)

        self.assertEqual(r.nsplits, t.nsplits)
        for c in r.chunks:
            self.assertIsInstance(c.op, TensorErf)
            self.assertEqual(c.index, c.inputs[0].index)
            self.assertEqual(c.shape, c.inputs[0].shape)
Exemple #25
0
    def testSliceTiles(self):
        t = ones((100, 200, 300), chunk_size=30)
        t2 = t[10:40, 199:, -30:303]
        t2 = t2.tiles()
        t = get_tiled(t)

        self.assertEqual(t2.chunk_shape, (2, 1, 1))
        self.assertEqual(t2.chunks[0].inputs[0], t.cix[0, -1, -1].data)
        self.assertEqual(
            t2.chunks[0].op.indexes,
            [slice(10, 30, 1), slice(19, 20, 1),
             slice(None)])
        self.assertEqual(t2.chunks[0].index, (0, 0, 0))
        self.assertEqual(t2.chunks[1].inputs[0], t.cix[1, -1, -1].data)
        self.assertEqual(
            t2.chunks[1].op.indexes,
            [slice(0, 10, 1), slice(19, 20, 1),
             slice(None)])
        self.assertEqual(t2.chunks[1].index, (1, 0, 0))
Exemple #26
0
    def testTensorExecuteNotFetch(self):
        data = np.random.random((5, 9))
        sess = Session.default_or_local()

        arr1 = mt.tensor(data, chunk_size=2) * 2

        with self.assertRaises(ValueError):
            sess.fetch(arr1)

        self.assertIs(arr1.execute(), arr1)

        # modify result
        executor = sess._sess._executor
        executor.chunk_result[get_tiled(arr1).chunks[0].key] = data[:2, :2] * 3

        expected = data * 2
        expected[:2, :2] = data[:2, :2] * 3

        np.testing.assert_array_equal(arr1.to_numpy(), expected)
Exemple #27
0
    def run_test(self, worker, calc_device=None):
        import mars.tensor as mt
        from mars.worker import ExecutionActor

        session_id = str(uuid.uuid4())

        gpu = calc_device in ('cuda', )
        a = mt.random.rand(100, 50, chunk_size=30, gpu=gpu)
        b = mt.random.rand(50, 200, chunk_size=30, gpu=gpu)
        result = a.dot(b)

        graph = result.build_graph(tiled=True)
        result = get_tiled(result)

        executor_ref = self.promise_ref(ExecutionActor.default_uid(),
                                        address=worker)
        io_meta = dict(chunks=[c.key for c in result.chunks])

        graph_key = str(id(graph))
        executor_ref.execute_graph(session_id, graph_key, serialize_graph(graph),
                                   io_meta, None, calc_device=calc_device, _promise=True) \
            .then(lambda *_: setattr(self, '_replied', True))
Exemple #28
0
    def testDataFrameExecuteNotFetch(self):
        data1 = pd.DataFrame(np.random.random((5, 4)), columns=list('abcd'))
        sess = Session.default_or_local()

        df1 = md.DataFrame(data1, chunk_size=2)

        with self.assertRaises(ValueError):
            sess.fetch(df1)

        self.assertIs(df1.execute(), df1)
        self.assertEqual(len(df1[df1['a'] > 1].to_pandas(fetch_kwargs={'batch_size': 2})), 0)
        self.assertEqual(len(df1[df1['a'] > 1]['a'].to_pandas(fetch_kwargs={'batch_size': 2})), 0)

        # modify result
        executor = sess._sess._executor
        executor.chunk_result[get_tiled(df1).chunks[0].key] = data1.iloc[:2, :2] * 3

        expected = data1
        expected.iloc[:2, :2] = data1.iloc[:2, :2] * 3

        pd.testing.assert_frame_equal(df1.to_pandas(), expected)
        pd.testing.assert_frame_equal(df1.to_pandas(fetch_kwargs={'batch_size': 2}), expected)
Exemple #29
0
    def run_simple_calc(self, session_id):
        self._session_id = session_id

        import mars.tensor as mt
        arr = mt.ones((4, ), chunk_size=4) + 1
        graph = arr.build_graph(fuse_enabled=False, tiled=True)

        arr = get_tiled(arr)
        self._array_key = arr.chunks[0].key

        graph_key = self._graph_key = str(uuid.uuid4())
        execution_ref = self.promise_ref(ExecutionActor.default_uid())
        execution_ref.execute_graph(session_id,
                                    graph_key,
                                    serialize_graph(graph),
                                    dict(chunks=[arr.chunks[0].key]),
                                    None,
                                    _tell=True)

        execution_ref.add_finish_callback(session_id, graph_key, _promise=True) \
            .then(lambda *_: self._results.append((True,))) \
            .catch(lambda *exc: self._results.append((False, exc)))
Exemple #30
0
    def testEstimateGraphFinishTime(self):
        pool_address = f'127.0.0.1:{get_next_port()}'
        session_id = str(uuid.uuid4())
        with create_actor_pool(n_process=1,
                               backend='gevent',
                               address=pool_address) as pool:
            self.create_standard_actors(pool, pool_address, with_daemon=False)

            status_ref = pool.actor_ref(StatusActor.default_uid())
            execution_ref = pool.actor_ref(ExecutionActor.default_uid())
            pool.create_actor(CpuCalcActor)

            import mars.tensor as mt
            arr = mt.ones((10, 8), chunk_size=10)
            graph = arr.build_graph(fuse_enabled=False, tiled=True)

            arr = get_tiled(arr)

            graph_key = str(uuid.uuid4())

            for _ in range(options.optimize.min_stats_count + 1):
                status_ref.update_mean_stats(
                    'calc_speed.' + type(arr.chunks[0].op).__name__, 10)
                status_ref.update_mean_stats('disk_read_speed', 10)
                status_ref.update_mean_stats('disk_write_speed', 10)
                status_ref.update_mean_stats('net_transfer_speed', 10)

            execution_ref.execute_graph(session_id, graph_key,
                                        serialize_graph(graph),
                                        dict(chunks=[arr.chunks[0].key]), None)
            execution_ref.estimate_graph_finish_time(session_id, graph_key)

            stats_dict = status_ref.get_stats(
                ['min_est_finish_time', 'max_est_finish_time'])
            self.assertIsNotNone(stats_dict.get('min_est_finish_time'))
            self.assertIsNotNone(stats_dict.get('max_est_finish_time'))