예제 #1
0
    def _prepare_test_graph(self, session_id, graph_key, mock_workers):
        addr = f'127.0.0.1:{get_next_port()}'
        a1 = mt.random.random((100,))
        a2 = mt.random.random((100,))
        s = a1 + a2
        v1, v2 = mt.split(s, 2)

        graph = TileableGraph([v1.data, v2.data])
        builder = TileableGraphBuilder(graph)
        next(iter(builder.build()))

        with create_actor_pool(n_process=1, backend='gevent', address=addr) as pool:
            pool.create_actor(SchedulerClusterInfoActor, [pool.cluster_info.address],
                              uid=SchedulerClusterInfoActor.default_uid())
            resource_ref = pool.create_actor(ResourceActor, uid=ResourceActor.default_uid())
            pool.create_actor(ChunkMetaActor, uid=ChunkMetaActor.default_uid())
            pool.create_actor(AssignerActor, uid=AssignerActor.gen_uid(session_id))
            graph_ref = pool.create_actor(GraphActor, session_id, graph_key, serialize_graph(graph),
                                          uid=GraphActor.gen_uid(session_id, graph_key))

            for w in mock_workers:
                resource_ref.set_worker_meta(w, dict(hardware=dict(cpu=4, cpu_total=4, memory=1600)))

            graph_ref.prepare_graph()
            graph_ref.analyze_graph()
            graph_ref.create_operand_actors(_start=False)

            yield pool, graph_ref
예제 #2
0
    def testEmptyGraph(self, *_):
        session_id = str(uuid.uuid4())

        addr = '127.0.0.1:%d' % get_next_port()
        with create_actor_pool(n_process=1, backend='gevent',
                               address=addr) as pool:
            pool.create_actor(SchedulerClusterInfoActor,
                              [pool.cluster_info.address],
                              uid=SchedulerClusterInfoActor.default_uid())
            resource_ref = pool.create_actor(ResourceActor,
                                             uid=ResourceActor.default_uid())
            pool.create_actor(ChunkMetaActor, uid=ChunkMetaActor.default_uid())
            pool.create_actor(AssignerActor,
                              uid=AssignerActor.gen_uid(session_id))

            resource_ref.set_worker_meta('localhost:12345',
                                         dict(hardware=dict(cpu_total=4)))
            resource_ref.set_worker_meta('localhost:23456',
                                         dict(hardware=dict(cpu_total=4)))

            graph_key = str(uuid.uuid4())
            serialized_graph = serialize_graph(DAG())

            graph_ref = pool.create_actor(GraphActor,
                                          session_id,
                                          graph_key,
                                          serialized_graph,
                                          uid=GraphActor.gen_uid(
                                              session_id, graph_key))
            graph_ref.execute_graph()
            self.assertEqual(graph_ref.get_state(), GraphState.SUCCEEDED)
예제 #3
0
    def testAssignerActor(self):
        mock_scheduler_addr = '127.0.0.1:%d' % get_next_port()
        with create_actor_pool(n_process=1,
                               backend='gevent',
                               address=mock_scheduler_addr) as pool:
            pool.create_actor(ClusterInfoActor, [pool.cluster_info.address],
                              uid=ClusterInfoActor.default_name())
            resource_ref = pool.create_actor(ResourceActor,
                                             uid=ResourceActor.default_name())
            chunk_meta_ref = pool.create_actor(
                ChunkMetaActor, uid=ChunkMetaActor.default_name())

            endpoint1 = 'localhost:12345'
            endpoint2 = 'localhost:23456'
            res = dict(hardware=dict(cpu=4, memory=4096))

            def write_mock_meta():
                resource_ref.set_worker_meta(endpoint1, res)
                resource_ref.set_worker_meta(endpoint2, res)

            g = gevent.spawn(write_mock_meta)
            g.join()

            assigner_ref = pool.create_actor(AssignerActor,
                                             uid=AssignerActor.default_name())

            session_id = str(uuid.uuid4())
            chunk_key1 = str(uuid.uuid4())
            chunk_key2 = str(uuid.uuid4())
            chunk_key3 = str(uuid.uuid4())

            op_info = {
                'op_name': 'test_op',
                'io_meta':
                dict(input_chunks=[chunk_key1, chunk_key2, chunk_key3]),
                'output_size': 512,
                'retries': 0,
                'optimize': {
                    'depth': 0,
                    'demand_depths': (),
                    'successor_size': 1,
                    'descendant_size': 0
                }
            }

            chunk_meta_ref.set_chunk_meta(session_id,
                                          chunk_key1,
                                          size=512,
                                          workers=(endpoint1, ))
            chunk_meta_ref.set_chunk_meta(session_id,
                                          chunk_key2,
                                          size=512,
                                          workers=(endpoint1, ))
            chunk_meta_ref.set_chunk_meta(session_id,
                                          chunk_key3,
                                          size=512,
                                          workers=(endpoint2, ))

            workers = assigner_ref.get_worker_assignments(session_id, op_info)
            self.assertEqual(workers[0], endpoint1)
예제 #4
0
            def execute_case():
                pool.create_actor(ClusterInfoActor,
                                  [pool.cluster_info.address],
                                  uid=ClusterInfoActor.default_name())
                resource_ref = pool.create_actor(
                    ResourceActor, uid=ResourceActor.default_name())
                pool.create_actor(ChunkMetaActor,
                                  uid=ChunkMetaActor.default_name())
                pool.create_actor(AssignerActor,
                                  uid=AssignerActor.gen_name(session_id))
                graph_ref = pool.create_actor(GraphActor,
                                              session_id,
                                              graph_key,
                                              serialize_graph(graph),
                                              uid=GraphActor.gen_name(
                                                  session_id, graph_key))
                execution_ref = pool.create_actor(FakeExecutionActor, sleep=1)

                # handle mock objects
                OperandActor._get_raw_execution_ref.side_effect = lambda: execution_ref

                mock_resource = dict(
                    hardware=dict(cpu=4, cpu_total=4, memory=512))

                def write_mock_meta():
                    resource_ref.set_worker_meta('localhost:12345',
                                                 mock_resource)
                    resource_ref.set_worker_meta('localhost:23456',
                                                 mock_resource)

                v = gevent.spawn(write_mock_meta)
                v.join()

                graph_ref.prepare_graph()
                fetched_graph = graph_ref.get_chunk_graph()

                graph_ref.scan_node()
                graph_ref.place_initial_chunks()

                final_keys = set()
                for c in fetched_graph:
                    if fetched_graph.count_successors(c) == 0:
                        final_keys.add(c.op.key)

                graph_ref.create_operand_actors()
                graph_meta_ref = pool.actor_ref(
                    GraphMetaActor.gen_name(session_id, graph_key))
                start_time = time.time()
                cancel_called = False
                while True:
                    gevent.sleep(0.1)
                    if not cancel_called and time.time() > start_time + 0.8:
                        cancel_called = True
                        graph_ref.stop_graph(_tell=True)
                    if time.time() - start_time > 30:
                        raise SystemError('Wait for execution finish timeout')
                    if graph_meta_ref.get_state() in (GraphState.SUCCEEDED,
                                                      GraphState.FAILED,
                                                      GraphState.CANCELLED):
                        break
예제 #5
0
    def _prepare_test_graph(self, session_id, graph_key, mock_workers):
        addr = '127.0.0.1:%d' % get_next_port()
        a1 = mt.random.random((100,))
        a2 = mt.random.random((100,))
        s = a1 + a2
        v1, v2 = mt.split(s, 2)

        graph = DAG()
        v1.build_graph(graph=graph, compose=False)
        v2.build_graph(graph=graph, compose=False)

        with create_actor_pool(n_process=1, backend='gevent', address=addr) as pool:
            pool.create_actor(ClusterInfoActor, [pool.cluster_info.address],
                              uid=ClusterInfoActor.default_name())
            resource_ref = pool.create_actor(ResourceActor, uid=ResourceActor.default_name())
            pool.create_actor(ChunkMetaActor, uid=ChunkMetaActor.default_name())
            pool.create_actor(AssignerActor, uid=AssignerActor.default_name())
            graph_ref = pool.create_actor(GraphActor, session_id, graph_key, serialize_graph(graph),
                                          uid=GraphActor.gen_name(session_id, graph_key))

            for w in mock_workers:
                resource_ref.set_worker_meta(w, dict(hardware=dict(cpu_total=4)))

            graph_ref.prepare_graph()
            graph_ref.analyze_graph()
            graph_ref.create_operand_actors(_start=False)

            yield pool, graph_ref
예제 #6
0
    def testErrorOnPrepare(self, *_):
        session_id = str(uuid.uuid4())

        addr = '127.0.0.1:%d' % get_next_port()
        with create_actor_pool(n_process=1, backend='gevent', address=addr) as pool:
            pool.create_actor(ClusterInfoActor, [pool.cluster_info.address],
                              uid=ClusterInfoActor.default_name())
            resource_ref = pool.create_actor(ResourceActor, uid=ResourceActor.default_name())
            pool.create_actor(ChunkMetaActor, uid=ChunkMetaActor.default_name())
            pool.create_actor(AssignerActor, uid=AssignerActor.default_name())

            resource_ref.set_worker_meta('localhost:12345', dict(hardware=dict(cpu_total=4)))
            resource_ref.set_worker_meta('localhost:23456', dict(hardware=dict(cpu_total=4)))

            # error occurred in create_operand_actors
            graph_key = str(uuid.uuid4())
            expr = mt.random.random((8, 2), chunk_size=2) + 1
            graph = expr.build_graph(compose=False)
            serialized_graph = serialize_graph(graph)

            graph_ref = pool.create_actor(GraphActor, session_id, graph_key, serialized_graph,
                                          uid=GraphActor.gen_name(session_id, graph_key))

            def _mock_raises(*_, **__):
                raise RuntimeError

            with patch_method(GraphActor.create_operand_actors, new=_mock_raises):
                with self.assertRaises(RuntimeError):
                    graph_ref.execute_graph()
            self.assertEqual(graph_ref.get_state(), GraphState.FAILED)
            graph_ref.destroy()

            # interrupted during create_operand_actors
            graph_key = str(uuid.uuid4())
            graph_ref = pool.create_actor(GraphActor, session_id, graph_key, serialized_graph,
                                          uid=GraphActor.gen_name(session_id, graph_key))

            def _mock_cancels(*_, **__):
                graph_meta_ref = pool.actor_ref(GraphMetaActor.gen_name(session_id, graph_key))
                graph_meta_ref.set_state(GraphState.CANCELLING)

            with patch_method(GraphActor.create_operand_actors, new=_mock_cancels):
                graph_ref.execute_graph()
            self.assertEqual(graph_ref.get_state(), GraphState.CANCELLED)

            # interrupted during previous steps
            graph_key = str(uuid.uuid4())
            graph_ref = pool.create_actor(GraphActor, session_id, graph_key, serialized_graph,
                                          uid=GraphActor.gen_name(session_id, graph_key))

            def _mock_cancels(*_, **__):
                graph_meta_ref = pool.actor_ref(GraphMetaActor.gen_name(session_id, graph_key))
                graph_meta_ref.set_state(GraphState.CANCELLING)
                return dict()

            with patch_method(GraphAnalyzer.calc_operand_assignments, new=_mock_cancels):
                graph_ref.execute_graph()
            self.assertEqual(graph_ref.get_state(), GraphState.CANCELLED)
예제 #7
0
    def testOperandActorWithCancel(self, *_):
        arr = mt.random.randint(10, size=(10, 8), chunk_size=4)
        arr_add = mt.random.randint(10, size=(10, 8), chunk_size=4)
        arr2 = arr + arr_add

        session_id = str(uuid.uuid4())
        graph_key = str(uuid.uuid4())

        graph = arr2.build_graph(compose=False)

        with create_actor_pool(n_process=1, backend='gevent') as pool:
            pool.create_actor(SchedulerClusterInfoActor, [pool.cluster_info.address],
                              uid=SchedulerClusterInfoActor.default_uid())
            resource_ref = pool.create_actor(ResourceActor, uid=ResourceActor.default_uid())
            pool.create_actor(ChunkMetaActor, uid=ChunkMetaActor.default_uid())
            pool.create_actor(AssignerActor, uid=AssignerActor.gen_uid(session_id))
            graph_ref = pool.create_actor(GraphActor, session_id, graph_key, serialize_graph(graph),
                                          uid=GraphActor.gen_uid(session_id, graph_key))

            def _build_mock_ref(uid=None, address=None):
                try:
                    return pool.create_actor(
                        FakeExecutionActor, exec_delay=0.2, uid=FakeExecutionActor.gen_uid(address))
                except ActorAlreadyExist:
                    return pool.actor_ref(FakeExecutionActor.gen_uid(address))

            # handle mock objects
            OperandActor._get_raw_execution_ref.side_effect = _build_mock_ref

            mock_resource = dict(hardware=dict(cpu=4, cpu_total=4, memory=512))

            for idx in range(20):
                resource_ref.set_worker_meta('localhost:%d' % (idx + 12345), mock_resource)

            graph_ref.prepare_graph(compose=False)
            fetched_graph = graph_ref.get_chunk_graph()

            graph_ref.analyze_graph()

            final_keys = set()
            for c in fetched_graph:
                if fetched_graph.count_successors(c) == 0:
                    final_keys.add(c.op.key)

            graph_ref.create_operand_actors()
            graph_meta_ref = pool.actor_ref(GraphMetaActor.gen_uid(session_id, graph_key))
            start_time = time.time()
            cancel_called = False
            while True:
                pool.sleep(0.05)
                if not cancel_called and time.time() > start_time + 0.3:
                    cancel_called = True
                    graph_ref.stop_graph(_tell=True)
                if time.time() - start_time > 30:
                    raise SystemError('Wait for execution finish timeout')
                if graph_meta_ref.get_state() in (GraphState.SUCCEEDED, GraphState.FAILED, GraphState.CANCELLED):
                    break
            self.assertEqual(graph_meta_ref.get_state(), GraphState.CANCELLED)
예제 #8
0
    def _run_operand_case(session_id, graph_key, tensor, execution_creator):
        graph = tensor.build_graph(compose=False)

        with create_actor_pool(n_process=1, backend='gevent') as pool:
            pool.create_actor(SchedulerClusterInfoActor,
                              [pool.cluster_info.address],
                              uid=SchedulerClusterInfoActor.default_uid())
            resource_ref = pool.create_actor(ResourceActor,
                                             uid=ResourceActor.default_uid())
            pool.create_actor(ChunkMetaActor, uid=ChunkMetaActor.default_uid())
            pool.create_actor(AssignerActor,
                              uid=AssignerActor.gen_uid(session_id))
            graph_ref = pool.create_actor(GraphActor,
                                          session_id,
                                          graph_key,
                                          serialize_graph(graph),
                                          uid=GraphActor.gen_uid(
                                              session_id, graph_key))

            def _build_mock_ref(uid=None, address=None):
                try:
                    return execution_creator(
                        pool, FakeExecutionActor.gen_uid(address))
                except ActorAlreadyExist:
                    return pool.actor_ref(FakeExecutionActor.gen_uid(address))

            # handle mock objects
            OperandActor._get_raw_execution_ref.side_effect = _build_mock_ref

            mock_resource = dict(hardware=dict(cpu=4, cpu_total=4, memory=512))

            resource_ref.set_worker_meta('localhost:12345', mock_resource)
            resource_ref.set_worker_meta('localhost:23456', mock_resource)

            graph_ref.prepare_graph()
            fetched_graph = graph_ref.get_chunk_graph()

            graph_ref.analyze_graph()

            final_keys = set()
            for c in fetched_graph:
                if fetched_graph.count_successors(c) == 0:
                    final_keys.add(c.op.key)

            graph_ref.create_operand_actors()

            graph_meta_ref = pool.actor_ref(
                GraphMetaActor.gen_uid(session_id, graph_key))
            start_time = time.time()
            while True:
                pool.sleep(0.1)
                if time.time() - start_time > 30:
                    raise SystemError('Wait for execution finish timeout')
                if graph_meta_ref.get_state() in (GraphState.SUCCEEDED,
                                                  GraphState.FAILED,
                                                  GraphState.CANCELLED):
                    break
예제 #9
0
            def execute_case():
                pool.create_actor(ClusterInfoActor,
                                  [pool.cluster_info.address],
                                  uid=ClusterInfoActor.default_name())
                resource_ref = pool.create_actor(
                    ResourceActor, uid=ResourceActor.default_name())
                kv_store_ref = pool.create_actor(
                    KVStoreActor, uid=KVStoreActor.default_name())
                pool.create_actor(AssignerActor,
                                  uid=AssignerActor.gen_name(session_id))
                graph_ref = pool.create_actor(GraphActor,
                                              session_id,
                                              graph_key,
                                              serialize_graph(graph),
                                              uid=GraphActor.gen_name(
                                                  session_id, graph_key))
                execution_ref = execution_creator(pool)

                # handle mock objects
                OperandActor._get_raw_execution_ref.side_effect = lambda: execution_ref

                mock_resource = dict(
                    hardware=dict(cpu=4, cpu_total=4, memory=512))

                def write_mock_meta():
                    resource_ref.set_worker_meta('localhost:12345',
                                                 mock_resource)
                    resource_ref.set_worker_meta('localhost:23456',
                                                 mock_resource)

                v = gevent.spawn(write_mock_meta)
                v.join()

                graph_ref.prepare_graph()
                graph_data = kv_store_ref.read(
                    '/sessions/%s/graphs/%s/chunk_graph' %
                    (session_id, graph_key)).value
                fetched_graph = deserialize_graph(graph_data)

                graph_ref.scan_node()
                graph_ref.place_initial_chunks()

                final_keys = set()
                for c in fetched_graph:
                    if fetched_graph.count_successors(c) == 0:
                        final_keys.add(c.op.key)

                graph_ref.create_operand_actors()
                start_time = time.time()
                while True:
                    gevent.sleep(0.1)
                    if time.time() - start_time > 30:
                        raise SystemError('Wait for execution finish timeout')
                    if kv_store_ref.read('/sessions/%s/graph/%s/state' % (session_id, graph_key)).value.lower() \
                            in ('succeeded', 'failed', 'cancelled'):
                        break
예제 #10
0
파일: test_assigner.py 프로젝트: zvrr/mars
    def testAssignerActor(self):
        mock_scheduler_addr = '127.0.0.1:%d' % get_next_port()
        with create_actor_pool(n_process=1, backend='gevent', address=mock_scheduler_addr) as pool:
            cluster_info_ref = pool.create_actor(SchedulerClusterInfoActor, [pool.cluster_info.address],
                                                 uid=SchedulerClusterInfoActor.default_uid())
            resource_ref = pool.create_actor(ResourceActor, uid=ResourceActor.default_uid())
            pool.create_actor(ChunkMetaActor, uid=ChunkMetaActor.default_uid())

            endpoint1 = 'localhost:12345'
            endpoint2 = 'localhost:23456'
            res = dict(hardware=dict(cpu=4, memory=4096))

            def write_mock_meta():
                resource_ref.set_worker_meta(endpoint1, res)
                resource_ref.set_worker_meta(endpoint2, res)

            g = gevent.spawn(write_mock_meta)
            g.join()

            assigner_ref = pool.create_actor(AssignerActor, uid=AssignerActor.default_uid())

            session_id = str(uuid.uuid4())
            op_key = str(uuid.uuid4())
            chunk_key1 = str(uuid.uuid4())
            chunk_key2 = str(uuid.uuid4())
            chunk_key3 = str(uuid.uuid4())

            op_info = {
                'op_name': 'test_op',
                'io_meta': dict(input_chunks=[chunk_key1, chunk_key2, chunk_key3]),
                'retries': 0,
                'optimize': {
                    'depth': 0,
                    'demand_depths': (),
                    'successor_size': 1,
                    'descendant_size': 0
                }
            }

            chunk_meta_client = ChunkMetaClient(pool, cluster_info_ref)
            chunk_meta_client.set_chunk_meta(session_id, chunk_key1, size=512, workers=(endpoint1,))
            chunk_meta_client.set_chunk_meta(session_id, chunk_key2, size=512, workers=(endpoint1,))
            chunk_meta_client.set_chunk_meta(session_id, chunk_key3, size=512, workers=(endpoint2,))

            reply_ref = pool.create_actor(PromiseReplyTestActor)
            reply_callback = ((reply_ref.uid, reply_ref.address), 'reply')
            assigner_ref.apply_for_resource(session_id, op_key, op_info, callback=reply_callback)

            while not reply_ref.get_reply():
                gevent.sleep(0.1)
            _, ret_value = reply_ref.get_reply()
            self.assertEqual(ret_value[0], endpoint1)
예제 #11
0
    def testFailoverMessage(self):
        mock_session_id = str(uuid.uuid4())
        mock_graph_key = str(uuid.uuid4())
        mock_chunk_key = str(uuid.uuid4())
        addr = '127.0.0.1:%d' % get_next_port()
        mock_worker_addr = '127.0.0.1:54132'

        options.scheduler.worker_blacklist_time = 0.5

        with create_actor_pool(n_process=1, backend='gevent', address=addr) as pool:
            pool.create_actor(ClusterInfoActor, [pool.cluster_info.address],
                              uid=ClusterInfoActor.default_name())
            pool.create_actor(AssignerActor, uid=AssignerActor.default_name())
            session_manager_ref = pool.create_actor(
                SessionManagerActor, uid=SessionManagerActor.default_name())
            resource_ref = pool.create_actor(ResourceActor, uid=ResourceActor.default_name())
            chunk_meta_ref = pool.create_actor(
                ChunkMetaActor, uid=ChunkMetaActor.default_name())

            session_ref = pool.actor_ref(session_manager_ref.create_session(mock_session_id))
            chunk_meta_ref.set_chunk_meta(mock_session_id, mock_chunk_key,
                                          size=80, shape=(10,), workers=(mock_worker_addr,))

            with mock.patch(GraphActor.__module__ + '.' + GraphActor.__name__, new=MockGraphActor):
                session_ref.submit_tensor_graph(None, mock_graph_key)
                graph_ref = pool.actor_ref(GraphActor.gen_name(mock_session_id, mock_graph_key))

                expire_time = time.time() - options.scheduler.status_timeout - 1
                resource_ref.set_worker_meta(mock_worker_addr, dict(update_time=expire_time))

                resource_ref.detect_dead_workers(_tell=True)
                pool.sleep(0.2)

                _, removes, lost_chunks = graph_ref.get_worker_change_args()
                self.assertListEqual(removes, [mock_worker_addr])
                self.assertListEqual(lost_chunks, [mock_chunk_key])

                self.assertNotIn(mock_worker_addr, resource_ref.get_workers_meta())
                resource_ref.set_worker_meta(mock_worker_addr, dict(update_time=time.time()))
                self.assertNotIn(mock_worker_addr, resource_ref.get_workers_meta())

                pool.sleep(0.4)
                resource_ref.set_worker_meta(mock_worker_addr, dict(update_time=time.time()))
                self.assertIn(mock_worker_addr, resource_ref.get_workers_meta())
예제 #12
0
    def prepare_graph_in_pool(self, expr, clean_io_meta=True, compose=False):
        session_id = str(uuid.uuid4())
        graph_key = str(uuid.uuid4())

        graph = expr.build_graph(compose=compose)
        serialized_graph = serialize_graph(graph)
        chunked_graph = expr.build_graph(compose=compose, tiled=True)

        addr = '127.0.0.1:%d' % get_next_port()
        with create_actor_pool(n_process=1, backend='gevent',
                               address=addr) as pool:
            pool.create_actor(SchedulerClusterInfoActor,
                              [pool.cluster_info.address],
                              uid=SchedulerClusterInfoActor.default_uid())
            resource_ref = pool.create_actor(ResourceActor,
                                             uid=ResourceActor.default_uid())
            pool.create_actor(ChunkMetaActor, uid=ChunkMetaActor.default_uid())
            pool.create_actor(AssignerActor,
                              uid=AssignerActor.gen_uid(session_id))
            graph_ref = pool.create_actor(GraphActor,
                                          session_id,
                                          graph_key,
                                          serialized_graph,
                                          uid=GraphActor.gen_uid(
                                              session_id, graph_key))

            graph_ref.prepare_graph(compose=compose)
            fetched_graph = graph_ref.get_chunk_graph()
            self.assertIsNotNone(fetched_graph)
            self.assertEqual(len(chunked_graph), len(fetched_graph))

            graph_ref.analyze_graph(do_placement=False)
            op_infos = graph_ref.get_operand_info()
            for n in fetched_graph:
                depth = op_infos[n.op.key]['optimize']['depth']
                self.assertIsNotNone(depth)
                successor_size = op_infos[
                    n.op.key]['optimize']['successor_size']
                self.assertIsNotNone(successor_size)
                descendant_size = op_infos[
                    n.op.key]['optimize']['descendant_size']
                self.assertIsNotNone(descendant_size)

            resource_ref.set_worker_meta('localhost:12345',
                                         dict(hardware=dict(cpu_total=4)))
            resource_ref.set_worker_meta('localhost:23456',
                                         dict(hardware=dict(cpu_total=4)))

            graph_ref.analyze_graph()
            op_infos = graph_ref.get_operand_info()

            for n in fetched_graph:
                if fetched_graph.count_predecessors(n) != 0:
                    continue
                target_worker = op_infos[n.op.key]['target_worker']
                self.assertIsNotNone(target_worker)

            graph_ref.create_operand_actors(_clean_info=clean_io_meta)
            op_infos = graph_ref.get_operand_info()

            if not clean_io_meta:
                orig_metas = dict()
                for n in fetched_graph:
                    try:
                        meta = orig_metas[n.op.key]
                    except KeyError:
                        meta = orig_metas[n.op.key] = dict(predecessors=set(),
                                                           successors=set(),
                                                           input_chunks=set(),
                                                           chunks=set())
                    meta['predecessors'].update([
                        pn.op.key for pn in fetched_graph.iter_predecessors(n)
                    ])
                    meta['successors'].update(
                        [sn.op.key for sn in fetched_graph.iter_successors(n)])
                    meta['input_chunks'].update(
                        [pn.key for pn in fetched_graph.iter_predecessors(n)])
                    meta['chunks'].update([c.key for c in n.op.outputs])

                for n in fetched_graph:
                    self.assertEqual(op_infos[n.op.key]['op_name'],
                                     type(n.op).__name__)

                    io_meta = op_infos[n.op.key]['io_meta']
                    orig_io_meta = orig_metas[n.op.key]

                    self.assertSetEqual(set(io_meta['predecessors']),
                                        set(orig_io_meta['predecessors']))
                    self.assertSetEqual(set(io_meta['successors']),
                                        set(orig_io_meta['successors']))
                    self.assertSetEqual(set(io_meta['input_chunks']),
                                        set(orig_io_meta['input_chunks']))
                    self.assertSetEqual(set(io_meta['chunks']),
                                        set(orig_io_meta['chunks']))

            yield pool, graph_ref
예제 #13
0
    def testGraphActor(self):
        session_id = str(uuid.uuid4())
        graph_key = str(uuid.uuid4())

        arr = mt.random.randint(10, size=(10, 8), chunk_size=4)
        arr_add = mt.random.randint(10, size=(10, 8), chunk_size=4)
        arr2 = arr + arr_add

        graph = arr2.build_graph(compose=False)
        serialized_graph = serialize_graph(graph)
        chunked_graph = arr2.build_graph(compose=False, tiled=True)

        addr = '127.0.0.1:%d' % get_next_port()
        with create_actor_pool(n_process=1, backend='gevent',
                               address=addr) as pool:
            pool.create_actor(ClusterInfoActor, [pool.cluster_info.address],
                              uid=ClusterInfoActor.default_name())
            resource_ref = pool.create_actor(ResourceActor,
                                             uid=ResourceActor.default_name())
            pool.create_actor(ChunkMetaActor,
                              uid=ChunkMetaActor.default_name())
            pool.create_actor(AssignerActor,
                              uid=AssignerActor.gen_name(session_id))
            graph_ref = pool.create_actor(GraphActor,
                                          session_id,
                                          graph_key,
                                          serialized_graph,
                                          uid=GraphActor.gen_name(
                                              session_id, graph_key))

            graph_ref.prepare_graph(compose=False)
            fetched_graph = graph_ref.get_chunk_graph()
            self.assertIsNotNone(fetched_graph)
            self.assertEqual(len(chunked_graph), len(fetched_graph))

            graph_ref.scan_node()
            op_infos = graph_ref.get_operand_info()
            for n in fetched_graph:
                depth = op_infos[n.op.key]['optimize']['depth']
                self.assertIsNotNone(depth)
                successor_size = op_infos[
                    n.op.key]['optimize']['successor_size']
                self.assertIsNotNone(successor_size)
                descendant_size = op_infos[
                    n.op.key]['optimize']['descendant_size']
                self.assertIsNotNone(descendant_size)

            resource_ref.set_worker_meta('localhost:12345',
                                         dict(hardware=dict(cpu_total=4)))
            resource_ref.set_worker_meta('localhost:23456',
                                         dict(hardware=dict(cpu_total=4)))

            graph_ref.place_initial_chunks()
            op_infos = graph_ref.get_operand_info()

            for n in fetched_graph:
                if fetched_graph.count_predecessors(n) != 0:
                    continue
                target_worker = op_infos[n.op.key]['target_worker']
                self.assertIsNotNone(target_worker)

            graph_ref.create_operand_actors(_clean_io_meta=False)
            op_infos = graph_ref.get_operand_info()

            for n in fetched_graph:
                self.assertEqual(op_infos[n.op.key]['op_name'],
                                 type(n.op).__name__)

                io_meta = op_infos[n.op.key]['io_meta']
                orig_io_meta = dict(
                    predecessors=list(
                        set(pn.op.key
                            for pn in fetched_graph.iter_predecessors(n))),
                    successors=list(
                        set(sn.op.key
                            for sn in fetched_graph.iter_successors(n))),
                    input_chunks=list(
                        set(pn.key
                            for pn in fetched_graph.iter_predecessors(n))),
                    chunks=list(c.key for c in n.op.outputs),
                )
                self.assertSetEqual(set(io_meta['predecessors']),
                                    set(orig_io_meta['predecessors']))
                self.assertSetEqual(set(io_meta['successors']),
                                    set(orig_io_meta['successors']))
                self.assertSetEqual(set(io_meta['input_chunks']),
                                    set(orig_io_meta['input_chunks']))
                self.assertSetEqual(set(io_meta['chunks']),
                                    set(orig_io_meta['chunks']))

                self.assertEqual(op_infos[n.op.key]['output_size'],
                                 sum(ch.nbytes for ch in n.op.outputs))
예제 #14
0
    def testSameKey(self, *_):
        session_id = str(uuid.uuid4())
        graph_key = str(uuid.uuid4())

        arr = mt.ones((5, 5), chunks=3)
        arr2 = mt.concatenate((arr, arr))

        graph = arr2.build_graph(compose=False)
        serialized_graph = serialize_graph(graph)
        chunked_graph = arr2.build_graph(compose=False, tiled=True)

        with create_actor_pool(n_process=1, backend='gevent') as pool:
            pool.create_actor(ClusterInfoActor, [pool.cluster_info.address],
                              uid=ClusterInfoActor.default_name())
            resource_ref = pool.create_actor(ResourceActor,
                                             uid=ResourceActor.default_name())
            kv_store_ref = pool.create_actor(KVStoreActor,
                                             uid=KVStoreActor.default_name())
            pool.create_actor(AssignerActor,
                              uid=AssignerActor.gen_name(session_id))
            graph_ref = pool.create_actor(GraphActor,
                                          session_id,
                                          graph_key,
                                          serialized_graph,
                                          uid=GraphActor.gen_name(
                                              session_id, graph_key))

            graph_ref.prepare_graph(compose=False)
            graph_data = kv_store_ref.read(
                '/sessions/%s/graphs/%s/chunk_graph' %
                (session_id, graph_key)).value
            self.assertIsNotNone(graph_data)
            fetched_graph = deserialize_graph(graph_data)
            self.assertEqual(len(chunked_graph), len(fetched_graph))

            graph_ref.scan_node()
            op_infos = graph_ref.get_operand_info()
            for n in fetched_graph:
                depth = op_infos[n.op.key]['optimize']['depth']
                self.assertIsNotNone(depth)
                successor_size = op_infos[
                    n.op.key]['optimize']['successor_size']
                self.assertIsNotNone(successor_size)
                descendant_size = op_infos[
                    n.op.key]['optimize']['descendant_size']
                self.assertIsNotNone(descendant_size)

            def write_mock_meta():
                resource_ref.set_worker_meta('localhost:12345',
                                             dict(hardware=dict(cpu_total=4)))
                resource_ref.set_worker_meta('localhost:23456',
                                             dict(hardware=dict(cpu_total=4)))

            v = gevent.spawn(write_mock_meta)
            v.join()

            graph_ref.place_initial_chunks()
            op_infos = graph_ref.get_operand_info()

            for n in fetched_graph:
                if fetched_graph.count_predecessors(n) != 0:
                    continue
                target_worker = op_infos[n.op.key]['target_worker']
                self.assertIsNotNone(target_worker)

            graph_ref.create_operand_actors()
            op_infos = graph_ref.get_operand_info()

            for n in fetched_graph:
                self.assertEqual(op_infos[n.op.key]['op_name'],
                                 type(n.op).__name__)

                io_meta = op_infos[n.op.key]['io_meta']
                orig_io_meta = dict(
                    predecessors=list(
                        set(pn.op.key
                            for pn in fetched_graph.iter_predecessors(n))),
                    successors=list(
                        set(sn.op.key
                            for sn in fetched_graph.iter_successors(n))),
                    input_chunks=list(
                        set(pn.key
                            for pn in fetched_graph.iter_predecessors(n))),
                    chunks=list(c.key for c in n.op.outputs),
                )
                self.assertSetEqual(set(io_meta['predecessors']),
                                    set(orig_io_meta['predecessors']))
                self.assertSetEqual(set(io_meta['successors']),
                                    set(orig_io_meta['successors']))
                self.assertSetEqual(set(io_meta['input_chunks']),
                                    set(orig_io_meta['input_chunks']))
                self.assertSetEqual(set(io_meta['chunks']),
                                    set(orig_io_meta['chunks']))

                self.assertEqual(op_infos[n.op.key]['output_size'],
                                 sum(ch.nbytes for ch in n.op.outputs))
예제 #15
0
    def testAssignerActor(self, *_):
        mock_scheduler_addr = f'127.0.0.1:{get_next_port()}'
        with create_actor_pool(n_process=1,
                               backend='gevent',
                               address=mock_scheduler_addr) as pool:
            cluster_info_ref = pool.create_actor(
                SchedulerClusterInfoActor, [pool.cluster_info.address],
                uid=SchedulerClusterInfoActor.default_uid())
            resource_ref = pool.create_actor(ResourceActor,
                                             uid=ResourceActor.default_uid())
            pool.create_actor(ChunkMetaActor, uid=ChunkMetaActor.default_uid())

            endpoint1 = 'localhost:12345'
            endpoint2 = 'localhost:23456'
            res = dict(hardware=dict(cpu=4, mem_quota=4096))

            resource_ref.set_worker_meta(endpoint1, res)
            resource_ref.set_worker_meta(endpoint2, res)

            assigner_ref = pool.create_actor(AssignerActor,
                                             uid=AssignerActor.default_uid())

            session_id = str(uuid.uuid4())
            op_key = str(uuid.uuid4())
            chunk_key1 = str(uuid.uuid4())
            chunk_key2 = str(uuid.uuid4())
            chunk_key3 = str(uuid.uuid4())

            op_info = {
                'op_name': 'test_op',
                'io_meta':
                dict(input_chunks=[chunk_key1, chunk_key2, chunk_key3]),
                'retries': 0,
                'optimize': {
                    'depth': 0,
                    'demand_depths': (),
                    'successor_size': 1,
                    'descendant_size': 0
                }
            }

            chunk_meta_client = ChunkMetaClient(pool, cluster_info_ref)
            chunk_meta_client.set_chunk_meta(session_id,
                                             chunk_key1,
                                             size=512,
                                             workers=(endpoint1, ))
            chunk_meta_client.set_chunk_meta(session_id,
                                             chunk_key2,
                                             size=512,
                                             workers=(endpoint1, ))
            chunk_meta_client.set_chunk_meta(session_id,
                                             chunk_key3,
                                             size=512,
                                             workers=(endpoint2, ))

            uid = OperandActor.gen_uid(session_id, op_key)
            reply_ref = pool.create_actor(MockOperandActor, uid=uid)
            assigner_ref.apply_for_resource(session_id, op_key, op_info)

            while not reply_ref.get_worker_ep():
                gevent.sleep(0.1)
            self.assertEqual(reply_ref.get_worker_ep(), endpoint1)

            with self.run_actor_test(pool) as test_actor, self.assertRaises(
                    TimeoutError):
                assigner_p_ref = test_actor.promise_ref(assigner_ref)

                try:
                    options.scheduler.assign_timeout = 1
                    res = dict(hardware=dict(cpu=4, mem_quota=0))
                    resource_ref.set_worker_meta(endpoint1, res)
                    resource_ref.set_worker_meta(endpoint2, res)

                    self.waitp(
                        assigner_p_ref.apply_for_resource(session_id,
                                                          op_key,
                                                          op_info,
                                                          _promise=True))
                finally:
                    options.scheduler.assign_timeout = 600