コード例 #1
0
ファイル: test_chunkmeta.py プロジェクト: ai-driven/mars
    def testChunkMetaCache(self):
        cache = ChunkMetaCache(9)

        for idx in range(10):
            cache['c%d' % idx] = WorkerMeta(idx, (idx, ), ('w0', ))
        self.assertNotIn('c0', cache)
        self.assertTrue(all('c%d' % idx in cache for idx in range(1, 10)))
        self.assertListEqual(sorted(cache.get_worker_chunk_keys('w0')),
                             ['c%d' % idx for idx in range(1, 10)])

        dup_cache = copy.deepcopy(cache)
        dup_cache.get('c1')
        dup_cache['c10'] = WorkerMeta(10, (10, ), ('w0', ))
        self.assertIsNone(dup_cache.get('c0'))
        self.assertNotIn('c2', dup_cache)
        self.assertIn('c1', dup_cache)
        self.assertTrue(all('c%d' % idx in dup_cache for idx in range(3, 11)))

        dup_cache = copy.deepcopy(cache)
        _ = dup_cache['c1']  # noqa: F841
        dup_cache['c10'] = WorkerMeta(10, (10, ), ('w0', ))
        self.assertNotIn('c2', dup_cache)
        self.assertIn('c1', dup_cache)
        self.assertTrue(all('c%d' % idx in dup_cache for idx in range(3, 11)))

        dup_cache = copy.deepcopy(cache)
        dup_cache['c1'] = WorkerMeta(1, (1, ), ('w0', ))
        dup_cache['c10'] = WorkerMeta(10, (10, ), ('w0', ))
        self.assertNotIn('c2', dup_cache)
        self.assertIn('c1', dup_cache)
        self.assertTrue(all('c%d' % idx in dup_cache for idx in range(3, 11)))
コード例 #2
0
ファイル: test_chunkmeta.py プロジェクト: ai-driven/mars
    def testChunkMetaStore(self):
        store = ChunkMetaStore()

        store['c0'] = WorkerMeta(0, (0, ), ('w0', ))
        self.assertIn('c0', store)
        self.assertEqual(store['c0'], WorkerMeta(0, (0, ), ('w0', )))
        self.assertEqual(store.get('c0'), WorkerMeta(0, (0, ), ('w0', )))
        self.assertIsNone(store.get('c1'))
        self.assertSetEqual(store.get_worker_chunk_keys('w0'), {'c0'})

        store['c0'] = WorkerMeta(0, (0, ), ('w1', ))
        self.assertEqual(store.get_worker_chunk_keys('w0'), set())
        self.assertSetEqual(store.get_worker_chunk_keys('w1'), {'c0'})

        del store['c0']
        self.assertNotIn('c0', store)

        store['c1'] = WorkerMeta(1, (1, ), ('w0', 'w1'))
        store['c2'] = WorkerMeta(2, (2, ), ('w1', ))
        store['c3'] = WorkerMeta(3, (3, ), ('w0', ))
        store['c4'] = WorkerMeta(4, (4, ), ('w0', ))
        affected = store.remove_worker_keys('w0', lambda k: k[-1] < '4')
        self.assertListEqual(affected, ['c3'])
        self.assertEqual(store.get('c1'), WorkerMeta(1, (1, ), ('w1', )))
        self.assertEqual(store.get('c2'), WorkerMeta(2, (2, ), ('w1', )))
        self.assertSetEqual(store.get_worker_chunk_keys('w0'), {'c4'})
        self.assertSetEqual(store.get_worker_chunk_keys('w1'), {'c1', 'c2'})
        self.assertNotIn('c3', store)
        self.assertIn('c4', store)

        affected = store.remove_worker_keys('w0')
        self.assertListEqual(affected, ['c4'])
        self.assertNotIn('c4', store)
        self.assertIsNone(store.get_worker_chunk_keys('w0'))
        self.assertSetEqual(store.get_worker_chunk_keys('w1'), {'c1', 'c2'})
コード例 #3
0
    def testFetchRemoteData(self):
        pool_address = '127.0.0.1:%d' % get_next_port()
        session_id = str(uuid.uuid4())
        mock_data = np.array([1, 2, 3, 4])
        with create_actor_pool(n_process=1, backend='gevent',
                               address=pool_address, distributor=MarsDistributor(2, 'w:0:')) as pool:
            self.create_standard_actors(pool, pool_address, with_daemon=False, with_status=False,
                                        with_resource=True)
            pool.create_actor(CpuCalcActor)
            pool.create_actor(InProcHolderActor)
            pool.create_actor(MockSenderActor, [mock_data], 'in', uid='w:mock_sender')

            import mars.tensor as mt
            from mars.tensor.fetch import TensorFetch
            arr = mt.ones((4,), chunk_size=4)
            arr_add = mt.array(mock_data)
            result_tensor = arr + arr_add
            graph = result_tensor.build_graph(compose=False, tiled=True)

            arr_add = get_tiled(arr_add)
            result_tensor = get_tiled(result_tensor)

            modified_chunk = arr_add.chunks[0]
            arr_add.chunks[0]._op = TensorFetch(
                dtype=modified_chunk.dtype, _outputs=[weakref.ref(o) for o in modified_chunk.op.outputs],
                _key=modified_chunk.op.key)

            with self.run_actor_test(pool) as test_actor:
                graph_key = str(uuid.uuid4())
                execution_ref = test_actor.promise_ref(ExecutionActor.default_uid())
                execution_ref.execute_graph(session_id, graph_key, serialize_graph(graph),
                                            dict(chunks=[result_tensor.chunks[0].key]), None, _tell=True)

                execution_ref.add_finish_callback(session_id, graph_key, _promise=True) \
                    .then(lambda *_: test_actor.set_result(None)) \
                    .catch(lambda *exc: test_actor.set_result(exc, False))

            with self.assertRaises(DependencyMissing):
                self.get_result()

            metas = {modified_chunk.key: WorkerMeta(mock_data.nbytes, mock_data.shape, ('0.0.0.0:1234',))}
            with self.run_actor_test(pool) as test_actor:
                graph_key = str(uuid.uuid4())
                execution_ref = test_actor.promise_ref(ExecutionActor.default_uid())
                execution_ref.execute_graph(session_id, graph_key, serialize_graph(graph),
                                            dict(chunks=[result_tensor.chunks[0].key]), metas, _tell=True)

                execution_ref.add_finish_callback(session_id, graph_key, _promise=True) \
                    .then(lambda *_: test_actor.set_result(None)) \
                    .catch(lambda *exc: test_actor.set_result(exc, False))

            with self.assertRaises(DependencyMissing):
                self.get_result()

            metas[modified_chunk.key] = WorkerMeta(
                mock_data.nbytes, mock_data.shape,
                ('0.0.0.0:1234', pool_address.replace('127.0.0.1', 'localhost')))
            with self.run_actor_test(pool) as test_actor:
                def _validate(_):
                    data = test_actor.shared_store.get(session_id, result_tensor.chunks[0].key)
                    assert_array_equal(data, mock_data + np.ones((4,)))

                graph_key = str(uuid.uuid4())
                execution_ref = test_actor.promise_ref(ExecutionActor.default_uid())
                execution_ref.execute_graph(session_id, graph_key, serialize_graph(graph),
                                            dict(chunks=[result_tensor.chunks[0].key]), metas, _tell=True)

                execution_ref.add_finish_callback(session_id, graph_key, _promise=True) \
                    .then(_validate) \
                    .then(lambda *_: test_actor.set_result(None)) \
                    .catch(lambda *exc: test_actor.set_result(exc, False))

            self.get_result()
コード例 #4
0
    def testPrepareSpilled(self):
        pool_address = '127.0.0.1:%d' % get_next_port()
        session_id = str(uuid.uuid4())
        mock_data = np.array([1, 2, 3, 4])

        options.worker.spill_directory = tempfile.mkdtemp(prefix='mars_worker_prep_spilled-')

        with create_actor_pool(n_process=1, backend='gevent', address=pool_address) as pool:
            self.create_standard_actors(pool, pool_address, with_daemon=False, with_status=False)
            pool.create_actor(IORunnerActor)
            pool.create_actor(CpuCalcActor)
            pool.create_actor(InProcHolderActor)

            import mars.tensor as mt
            from mars.tensor.fetch import TensorFetch
            arr = mt.ones((4,), chunk_size=4)
            arr_add = mt.array(mock_data)
            result_tensor = arr + arr_add
            graph = result_tensor.build_graph(compose=False, tiled=True)

            arr_add = get_tiled(arr_add)
            result_tensor = get_tiled(result_tensor)

            modified_chunk = arr_add.chunks[0]
            arr_add.chunks[0]._op = TensorFetch(
                dtype=modified_chunk.dtype, _outputs=[weakref.ref(o) for o in modified_chunk.op.outputs],
                _key=modified_chunk.op.key)

            # test meta missing
            with self.run_actor_test(pool) as test_actor:
                graph_key = str(uuid.uuid4())
                execution_ref = test_actor.promise_ref(ExecutionActor.default_uid())
                execution_ref.execute_graph(session_id, graph_key, serialize_graph(graph),
                                            dict(chunks=[result_tensor.chunks[0].key]), None, _promise=True) \
                    .then(lambda *_: test_actor.set_result(None)) \
                    .catch(lambda *exc: test_actor.set_result(exc, False))

            with self.assertRaises(DependencyMissing):
                self.get_result()

            metas = {modified_chunk.key: WorkerMeta(
                mock_data.nbytes, mock_data.shape, ('0.0.0.0:1234', pool_address))}

            # test read from spilled file
            with self.run_actor_test(pool) as test_actor:
                self.waitp(
                    test_actor.storage_client.put_objects(
                            session_id, [modified_chunk.key], [mock_data], [DataStorageDevice.PROC_MEMORY])
                        .then(lambda *_: test_actor.storage_client.copy_to(
                            session_id, [modified_chunk.key], [DataStorageDevice.DISK]))
                )
                test_actor.storage_client.delete(session_id, [modified_chunk.key],
                                                 [DataStorageDevice.PROC_MEMORY])

                def _validate(_):
                    data = test_actor.shared_store.get(session_id, result_tensor.chunks[0].key)
                    assert_array_equal(data, mock_data + np.ones((4,)))

                graph_key = str(uuid.uuid4())
                execution_ref = test_actor.promise_ref(ExecutionActor.default_uid())
                execution_ref.execute_graph(session_id, graph_key, serialize_graph(graph),
                                            dict(chunks=[result_tensor.chunks[0].key]), metas, _promise=True) \
                    .then(_validate) \
                    .then(lambda *_: test_actor.set_result(None)) \
                    .catch(lambda *exc: test_actor.set_result(exc, False))

            self.get_result()
コード例 #5
0
    def testPrepareQuota(self, *_):
        pinned = True

        orig_pin = SharedHolderActor.pin_data_keys

        def _mock_pin(self, session_id, chunk_keys, token):
            from mars.errors import PinDataKeyFailed
            if pinned:
                raise PinDataKeyFailed
            return orig_pin(self, session_id, chunk_keys, token)

        pool_address = '127.0.0.1:%d' % get_next_port()
        session_id = str(uuid.uuid4())
        mock_data = np.array([1, 2, 3, 4])
        with patch_method(SharedHolderActor.pin_data_keys, new=_mock_pin), \
                create_actor_pool(n_process=1, backend='gevent', address=pool_address) as pool:
            self.create_standard_actors(pool, pool_address, with_daemon=False, with_status=False)
            pool.create_actor(MockSenderActor, [mock_data], 'in', uid='w:mock_sender')
            pool.create_actor(CpuCalcActor)
            pool.create_actor(InProcHolderActor)
            pool.actor_ref(WorkerClusterInfoActor.default_uid())

            import mars.tensor as mt
            from mars.tensor.fetch import TensorFetch
            arr = mt.ones((4,), chunk_size=4)
            arr_add = mt.array(mock_data)
            result_tensor = arr + arr_add
            graph = result_tensor.build_graph(compose=False, tiled=True)

            arr_add = get_tiled(arr_add)
            result_tensor = get_tiled(result_tensor)

            modified_chunk = arr_add.chunks[0]
            arr_add.chunks[0]._op = TensorFetch(
                dtype=modified_chunk.dtype, _outputs=[weakref.ref(o) for o in modified_chunk.op.outputs],
                _key=modified_chunk.op.key)
            metas = {modified_chunk.key: WorkerMeta(
                mock_data.nbytes, mock_data.shape,
                ('0.0.0.0:1234', pool_address.replace('127.0.0.1', 'localhost')))}
            with self.run_actor_test(pool) as test_actor:
                graph_key = str(uuid.uuid4())
                execution_ref = test_actor.promise_ref(ExecutionActor.default_uid())

                start_time = time.time()

                execution_ref.execute_graph(
                    session_id, graph_key, serialize_graph(graph),
                    dict(chunks=[result_tensor.chunks[0].key]), metas, _tell=True)

                execution_ref.add_finish_callback(session_id, graph_key, _promise=True) \
                    .then(lambda *_: test_actor.set_result(time.time())) \
                    .catch(lambda *exc: test_actor.set_result(exc, False))

                def _delay_fun():
                    nonlocal pinned
                    time.sleep(0.5)
                    pinned = False

                threading.Thread(target=_delay_fun).start()

            finish_time = self.get_result()
            self.assertGreaterEqual(finish_time, start_time + 0.5)
コード例 #6
0
    def testSimpleExecution(self):
        pool_address = '127.0.0.1:%d' % get_next_port()
        with create_actor_pool(n_process=1, backend='gevent', address=pool_address) as pool:
            self.create_standard_actors(pool, pool_address, with_daemon=False)
            pool.create_actor(CpuCalcActor, uid='w:1:calc-a')
            pool.create_actor(InProcHolderActor)

            import mars.tensor as mt
            from mars.tensor.datasource import TensorOnes
            from mars.tensor.fetch import TensorFetch
            arr = mt.ones((10, 8), chunk_size=10)
            arr_add = mt.ones((10, 8), chunk_size=10)
            arr2 = arr + arr_add
            graph = arr2.build_graph(compose=False, tiled=True)

            arr = get_tiled(arr)
            arr2 = get_tiled(arr2)

            metas = dict()
            for chunk in graph:
                if isinstance(chunk.op, TensorOnes):
                    chunk._op = TensorFetch(
                        dtype=chunk.dtype, _outputs=[weakref.ref(o) for o in chunk.op.outputs],
                        _key=chunk.op.key)
                    metas[chunk.key] = WorkerMeta(chunk.nbytes, chunk.shape, pool_address)

            with self.run_actor_test(pool) as test_actor:
                session_id = str(uuid.uuid4())

                storage_client = test_actor.storage_client
                self.waitp(
                    storage_client.put_objects(session_id, [arr.chunks[0].key], [np.ones((10, 8), dtype=np.int16)],
                                               [DataStorageDevice.SHARED_MEMORY]),
                )

                execution_ref = test_actor.promise_ref(ExecutionActor.default_uid())

                def _validate(_):
                    data = test_actor.shared_store.get(session_id, arr2.chunks[0].key)
                    assert_array_equal(data, 2 * np.ones((10, 8)))

                graph_key = str(uuid.uuid4())
                execution_ref.execute_graph(session_id, graph_key, serialize_graph(graph),
                                            dict(chunks=[arr2.chunks[0].key]), metas, _promise=True) \
                    .then(_validate) \
                    .then(lambda *_: test_actor.set_result(None)) \
                    .catch(lambda *exc: test_actor.set_result(exc, False))

            self.get_result()

            with self.run_actor_test(pool) as test_actor:
                execution_ref = test_actor.promise_ref(ExecutionActor.default_uid())

                def _validate(_):
                    data = test_actor.shared_store.get(session_id, arr2.chunks[0].key)
                    assert_array_equal(data, 2 * np.ones((10, 8)))

                execution_ref.add_finish_callback(session_id, graph_key, _promise=True) \
                    .then(_validate) \
                    .then(lambda *_: test_actor.set_result(None)) \
                    .catch(lambda *exc: test_actor.set_result(exc, False))

            self.get_result()
コード例 #7
0
ファイル: test_chunkmeta.py プロジェクト: ai-driven/mars
    def testChunkMetaActors(self, *_):
        proc_count = 2
        endpoints = [
            '127.0.0.1:%d' % get_next_port() for _ in range(proc_count)
        ]
        keys = []

        def _mock_get_scheduler(key):
            return endpoints[keys.index(key[1]) % len(endpoints)]

        ChunkMetaClient.get_scheduler.side_effect = _mock_get_scheduler

        session1 = str(uuid.uuid4())
        session2 = str(uuid.uuid4())
        with create_actor_pool(n_process=1,
                               backend='gevent',
                               address=endpoints[0]) as pool1:
            cluster_info1 = pool1.create_actor(
                SchedulerClusterInfoActor,
                endpoints,
                uid=SchedulerClusterInfoActor.default_name())
            pool1.create_actor(ChunkMetaActor,
                               uid=ChunkMetaActor.default_name())

            with create_actor_pool(n_process=1,
                                   backend='gevent',
                                   address=endpoints[1]) as pool2:
                cluster_info2 = pool2.create_actor(
                    SchedulerClusterInfoActor,
                    endpoints,
                    uid=SchedulerClusterInfoActor.default_name())
                pool2.create_actor(ChunkMetaActor,
                                   uid=ChunkMetaActor.default_name())

                actor_client = new_client()
                client1 = ChunkMetaClient(
                    actor_client, actor_client.actor_ref(cluster_info1))
                client2 = ChunkMetaClient(
                    actor_client, actor_client.actor_ref(cluster_info2))

                loc_ref1 = actor_client.actor_ref(
                    ChunkMetaActor.default_name(), address=endpoints[0])
                loc_ref2 = actor_client.actor_ref(
                    ChunkMetaActor.default_name(), address=endpoints[1])

                key1 = (str(uuid.uuid4()), str(uuid.uuid4()))
                key2 = str(uuid.uuid4())
                key3 = str(uuid.uuid4())
                key4 = (str(uuid.uuid4()), str(uuid.uuid4()))
                key5 = str(uuid.uuid4())
                key6 = str(uuid.uuid4())
                keys = [key1, key2, key3, key4, key5, key6]
                client1.set_chunk_size(session1, key1, 512)
                client2.set_chunk_size(session1, key2, 1024)
                client2.set_chunk_size(session2, key3, 1024)

                self.assertEqual(client1.get_chunk_size(session1, key1), 512)
                self.assertEqual(client2.get_chunk_size(session1, key2), 1024)
                self.assertEqual(client1.get_chunk_size(session1, key2), 1024)
                self.assertEqual(client2.get_chunk_size(session1, key1), 512)

                self.assertListEqual(
                    client1.batch_get_chunk_size(session1, [key1, key2]),
                    [512, 1024])
                self.assertListEqual(
                    client2.batch_get_chunk_size(session1, [key1, key2]),
                    [512, 1024])

                client1.set_chunk_shape(session1, key1, (10, ))
                client2.set_chunk_shape(session1, key2, (10, ) * 2)
                client2.set_chunk_shape(session2, key3, (10, ) * 2)

                self.assertEqual(client1.get_chunk_shape(session1, key1),
                                 (10, ))
                self.assertEqual(client2.get_chunk_shape(session1, key2),
                                 (10, ) * 2)
                self.assertEqual(client1.get_chunk_shape(session1, key2),
                                 (10, ) * 2)
                self.assertEqual(client2.get_chunk_shape(session1, key1),
                                 (10, ))

                self.assertListEqual(
                    client1.batch_get_chunk_shape(session1, [key1, key2]),
                    [(10, ), (10, ) * 2])
                self.assertListEqual(
                    client2.batch_get_chunk_shape(session1, [key1, key2]),
                    [(10, ), (10, ) * 2])

                mock_endpoint = '127.0.0.1:%d' % get_next_port()
                with create_actor_pool(n_process=1,
                                       backend='gevent',
                                       address=mock_endpoint) as pool3:
                    cluster_info3 = pool3.create_actor(
                        SchedulerClusterInfoActor,
                        endpoints,
                        uid=SchedulerClusterInfoActor.default_name())
                    client3 = ChunkMetaClient(
                        actor_client, actor_client.actor_ref(cluster_info3))
                    self.assertListEqual(
                        client3.batch_get_chunk_shape(session1, [key1, key2]),
                        [(10, ), (10, ) * 2])

                client1.add_worker(session1, key1, 'abc')
                client1.add_worker(session1, key1, 'def')
                client2.add_worker(session1, key2, 'ghi')

                client1.add_worker(session2, key3, 'ghi')

                self.assertEqual(sorted(client1.get_workers(session1, key1)),
                                 sorted(('abc', 'def')))
                self.assertEqual(sorted(client2.get_workers(session1, key2)),
                                 sorted(('ghi', )))

                batch_result = client1.batch_get_workers(
                    session1, [key1, key2])
                self.assertEqual(sorted(batch_result[0]), sorted(
                    ('abc', 'def')))
                self.assertEqual(sorted(batch_result[1]), sorted(('ghi', )))

                affected = []
                for loc_ref in (loc_ref1, loc_ref2):
                    affected.extend(
                        loc_ref.remove_workers_in_session(session2, ['ghi']))
                self.assertEqual(affected, [key3])
                self.assertEqual(sorted(client1.get_workers(session1, key2)),
                                 sorted(('ghi', )))
                self.assertIsNone(client1.get_workers(session2, key3))

                client1.delete_meta(session1, key1)
                self.assertIsNone(client1.get_workers(session1, key1))
                self.assertIsNone(
                    client1.batch_get_chunk_size(session1, [key1, key2])[0])
                self.assertIsNone(
                    client1.batch_get_workers(session1, [key1, key2])[0])

                client2.batch_delete_meta(session1, [key1, key2])
                self.assertIsNone(client1.get_workers(session1, key2))
                self.assertIsNone(
                    client1.batch_get_chunk_size(session1, [key1, key2])[1])
                self.assertIsNone(
                    client1.batch_get_workers(session1, [key1, key2])[1])

                meta4 = WorkerMeta(chunk_size=512,
                                   chunk_shape=(10, ) * 2,
                                   workers=(endpoints[0], ))
                loc_ref2.batch_set_chunk_meta(session1, [key4], [meta4])
                self.assertEqual(
                    loc_ref2.get_chunk_meta(session1, key4).chunk_size, 512)
                self.assertEqual(
                    loc_ref2.get_chunk_meta(session1, key4).chunk_shape,
                    (10, ) * 2)

                meta5 = WorkerMeta(chunk_size=512,
                                   chunk_shape=(10, ) * 2,
                                   workers=(endpoints[0], ))
                meta6 = WorkerMeta(chunk_size=512,
                                   chunk_shape=(10, ) * 2,
                                   workers=(endpoints[0], ))
                client1.batch_set_chunk_meta(session1, [key5, key6],
                                             [meta5, meta6])
                self.assertEqual(
                    loc_ref1.get_chunk_meta(session1, key5).chunk_size, 512)
                self.assertEqual(
                    loc_ref2.get_chunk_meta(session1, key6).chunk_size, 512)
コード例 #8
0
ファイル: test_chunkmeta.py プロジェクト: ai-driven/mars
    def testChunkBroadcast(self, *_):
        proc_count = 2
        endpoints = [
            '127.0.0.1:%d' % get_next_port() for _ in range(proc_count)
        ]
        keys = []

        def _mock_get_scheduler(key):
            return endpoints[keys.index(key[1]) % len(endpoints)]

        ChunkMetaClient.get_scheduler.side_effect = _mock_get_scheduler

        session_id = str(uuid.uuid4())
        with create_actor_pool(n_process=1,
                               backend='gevent',
                               address=endpoints[0]) as pool1:
            cluster_info1 = pool1.create_actor(
                SchedulerClusterInfoActor,
                endpoints,
                uid=SchedulerClusterInfoActor.default_name())
            pool1.create_actor(ChunkMetaActor,
                               uid=ChunkMetaActor.default_name())

            with create_actor_pool(n_process=1,
                                   backend='gevent',
                                   address=endpoints[1]) as pool2:
                cluster_info2 = pool2.create_actor(
                    SchedulerClusterInfoActor,
                    endpoints,
                    uid=SchedulerClusterInfoActor.default_name())
                pool2.create_actor(ChunkMetaActor,
                                   uid=ChunkMetaActor.default_name())

                actor_client = new_client()
                client1 = ChunkMetaClient(
                    actor_client, actor_client.actor_ref(cluster_info1))
                client2 = ChunkMetaClient(
                    actor_client, actor_client.actor_ref(cluster_info2))
                local_ref1 = actor_client.actor_ref(
                    ChunkMetaActor.default_name(), address=endpoints[0])
                local_ref2 = actor_client.actor_ref(
                    ChunkMetaActor.default_name(), address=endpoints[1])

                key1 = str(uuid.uuid4())
                key2 = str(uuid.uuid4())
                key3 = str(uuid.uuid4())
                keys = [key1, key2, key3]

                client1.set_chunk_broadcasts(session_id, key1, [endpoints[1]])
                client1.set_chunk_size(session_id, key1, 512)
                client1.set_chunk_shape(session_id, key1, (10, ) * 2)
                client1.add_worker(session_id, key1, 'abc')
                client2.set_chunk_broadcasts(session_id, key2, [endpoints[0]])
                client2.set_chunk_size(session_id, key2, 512)
                client1.set_chunk_shape(session_id, key2, (10, ) * 2)
                client2.add_worker(session_id, key2, 'def')
                pool2.sleep(0.1)

                self.assertEqual(
                    local_ref1.get_chunk_meta(session_id, key1).chunk_size,
                    512)
                self.assertEqual(
                    local_ref1.get_chunk_meta(session_id, key1).chunk_shape,
                    (10, ) * 2)
                self.assertEqual(
                    local_ref1.get_chunk_broadcasts(session_id, key1),
                    [endpoints[1]])
                self.assertEqual(
                    local_ref2.get_chunk_meta(session_id, key1).chunk_size,
                    512)
                self.assertEqual(
                    local_ref2.get_chunk_meta(session_id, key1).chunk_shape,
                    (10, ) * 2)
                self.assertEqual(
                    local_ref2.get_chunk_broadcasts(session_id, key2),
                    [endpoints[0]])

                client1.batch_set_chunk_broadcasts(session_id, [key3],
                                                   [[endpoints[1]]])
                meta3 = WorkerMeta(chunk_size=512,
                                   chunk_shape=(10, ) * 2,
                                   workers=(endpoints[0], ))
                local_ref1.batch_set_chunk_meta(session_id, [key3], [meta3])
                self.assertEqual(
                    local_ref2.get_chunk_meta(session_id, key3).chunk_size,
                    512)
                self.assertEqual(
                    local_ref2.get_chunk_meta(session_id, key3).chunk_shape,
                    (10, ) * 2)

                client1.delete_meta(session_id, key1)
                pool2.sleep(0.1)

                self.assertIsNone(local_ref1.get_chunk_meta(session_id, key1))
                self.assertIsNone(local_ref2.get_chunk_meta(session_id, key1))
                self.assertIsNone(
                    local_ref1.get_chunk_broadcasts(session_id, key1))

                local_ref1.remove_workers_in_session(session_id, ['def'])
                local_ref2.remove_workers_in_session(session_id, ['def'])
                pool2.sleep(0.1)

                self.assertIsNone(local_ref1.get_chunk_meta(session_id, key2))
                self.assertIsNone(local_ref2.get_chunk_meta(session_id, key2))
                self.assertIsNone(
                    local_ref2.get_chunk_broadcasts(session_id, key2))
コード例 #9
0
ファイル: test_analyzer.py プロジェクト: tangyiyong/mars
    def testAssignWithPreviousData(self):
        r"""
        Proper initial allocation should divide the graph like

         0   1  |  2   3  |  4   5
          \ /   |   \ /   |   \ /
           6    |    7    |    8
        """
        graph, _ = self._build_chunk_dag(
            '0, 1, 2, 3, 4, 5, 6, 7, 8',
            '0 -> 6, 1 -> 6, 2 -> 7, 3 -> 7, 4 -> 8, 5 -> 8')

        # assign with partial mismatch
        data_dist = {
            '0':
            dict(c00=WorkerMeta(chunk_size=5, workers=('w1', )),
                 c01=WorkerMeta(chunk_size=5, workers=('w2', ))),
            '1':
            dict(c10=WorkerMeta(chunk_size=10, workers=('w1', ))),
            '2':
            dict(c20=WorkerMeta(chunk_size=10, workers=('w3', ))),
            '3':
            dict(c30=WorkerMeta(chunk_size=10, workers=('w3', ))),
            '4':
            dict(c40=WorkerMeta(chunk_size=7, workers=('w3', ))),
        }
        analyzer = GraphAnalyzer(graph, dict(w1=24, w2=24, w3=24))
        assignments = analyzer.calc_operand_assignments(
            analyzer.get_initial_operand_keys(), input_chunk_metas=data_dist)

        self.assertEqual(len(assignments), 6)

        # explanation of the result:
        # for '1', all data are in w1, hence assigned to w1
        # '0' assigned to w1 according to connectivity
        # '2' and '3' assigned to w3 according to connectivity
        # '4' assigned to w2 because it has fewer data, and the slots of w3 is used up

        self.assertEqual(assignments['0'], 'w1')
        self.assertEqual(assignments['1'], 'w1')
        self.assertEqual(assignments['2'], 'w3')
        self.assertEqual(assignments['3'], 'w3')
        self.assertEqual(assignments['4'], 'w2')
        self.assertEqual(assignments['5'], 'w2')

        # assign with full mismatch
        data_dist = {
            '0':
            dict(c00=WorkerMeta(chunk_size=5, workers=('w1', )),
                 c01=WorkerMeta(chunk_size=5, workers=(
                     'w1',
                     'w2',
                 ))),
            '1':
            dict(c10=WorkerMeta(chunk_size=10, workers=('w1', ))),
            '2':
            dict(c20=WorkerMeta(chunk_size=10, workers=('w3', ))),
            '3':
            dict(c30=WorkerMeta(chunk_size=10, workers=('w3', ))),
            '4':
            dict(c40=WorkerMeta(chunk_size=7, workers=('w2', ))),
            '5':
            dict(c50=WorkerMeta(chunk_size=7, workers=('w2', ))),
        }
        analyzer = GraphAnalyzer(graph, dict(w1=24, w2=24, w3=24))
        assignments = analyzer.calc_operand_assignments(
            analyzer.get_initial_operand_keys(), input_chunk_metas=data_dist)

        self.assertEqual(len(assignments), 6)
        self.assertEqual(assignments['0'], 'w1')
        self.assertEqual(assignments['1'], 'w1')
        self.assertEqual(assignments['2'], 'w3')
        self.assertEqual(assignments['3'], 'w3')
        self.assertEqual(assignments['4'], 'w2')
        self.assertEqual(assignments['5'], 'w2')
コード例 #10
0
ファイル: test_analyzer.py プロジェクト: yyaaa1/mars
    def testAssignWithPreviousData(self):
        import numpy as np
        from mars.scheduler.chunkmeta import WorkerMeta
        from mars.tensor.random import TensorRandint
        from mars.tensor.arithmetic import TensorTreeAdd

        graph = DAG()
        r"""
        Proper initial allocation should divide the graph like

         U   U  |  U   U  |  U   U
          \ /   |   \ /   |   \ /
           U    |    U    |    U
        """

        inputs = [
            tuple(
                TensorRandint(_key=str(i * 2 +
                                       j), dtype=np.float32()).new_chunk(
                                           None, shape=(10, 10))
                for j in range(2)) for i in range(3)
        ]
        results = [
            TensorTreeAdd(dtype=np.float32()).new_chunk(None, shape=(10, 10))
            for _ in range(3)
        ]
        for inp, r in zip(inputs, results):
            r.op._inputs = list(inp)

            graph.add_node(r)
            for n in inp:
                graph.add_node(n)
                graph.add_edge(n, r)

        # assign with partial mismatch
        data_dist = {
            '0':
            dict(c00=WorkerMeta(chunk_size=5, workers=('w1', )),
                 c01=WorkerMeta(chunk_size=5, workers=('w2', ))),
            '1':
            dict(c10=WorkerMeta(chunk_size=10, workers=('w1', ))),
            '2':
            dict(c20=WorkerMeta(chunk_size=10, workers=('w3', ))),
            '3':
            dict(c30=WorkerMeta(chunk_size=10, workers=('w3', ))),
            '4':
            dict(c40=WorkerMeta(chunk_size=7, workers=('w3', ))),
        }
        analyzer = GraphAnalyzer(graph, dict(w1=24, w2=24, w3=24))
        assignments = analyzer.calc_operand_assignments(
            analyzer.get_initial_operand_keys(), input_chunk_metas=data_dist)

        self.assertEqual(len(assignments), 6)

        # explanation of the result:
        # for '1', all data are in w1, hence assigned to w1
        # '0' assigned to w1 according to connectivity
        # '2' and '3' assigned to w3 according to connectivity
        # '4' assigned to w2 because it has fewer data, and the slots of w3 is used up

        self.assertEqual(assignments['0'], 'w1')
        self.assertEqual(assignments['1'], 'w1')
        self.assertEqual(assignments['2'], 'w3')
        self.assertEqual(assignments['3'], 'w3')
        self.assertEqual(assignments['4'], 'w2')
        self.assertEqual(assignments['5'], 'w2')

        # assign with full mismatch
        data_dist = {
            '0':
            dict(c00=WorkerMeta(chunk_size=5, workers=('w1', )),
                 c01=WorkerMeta(chunk_size=5, workers=(
                     'w1',
                     'w2',
                 ))),
            '1':
            dict(c10=WorkerMeta(chunk_size=10, workers=('w1', ))),
            '2':
            dict(c20=WorkerMeta(chunk_size=10, workers=('w3', ))),
            '3':
            dict(c30=WorkerMeta(chunk_size=10, workers=('w3', ))),
            '4':
            dict(c40=WorkerMeta(chunk_size=7, workers=('w2', ))),
            '5':
            dict(c50=WorkerMeta(chunk_size=7, workers=('w2', ))),
        }
        analyzer = GraphAnalyzer(graph, dict(w1=24, w2=24, w3=24))
        assignments = analyzer.calc_operand_assignments(
            analyzer.get_initial_operand_keys(), input_chunk_metas=data_dist)

        self.assertEqual(len(assignments), 6)
        self.assertEqual(assignments['0'], 'w1')
        self.assertEqual(assignments['1'], 'w1')
        self.assertEqual(assignments['2'], 'w3')
        self.assertEqual(assignments['3'], 'w3')
        self.assertEqual(assignments['4'], 'w2')
        self.assertEqual(assignments['5'], 'w2')