Beispiel #1
0
    def testEmptyGraph(self, *_):
        session_id = str(uuid.uuid4())

        addr = '127.0.0.1:%d' % get_next_port()
        with create_actor_pool(n_process=1, backend='gevent',
                               address=addr) as pool:
            pool.create_actor(SchedulerClusterInfoActor,
                              [pool.cluster_info.address],
                              uid=SchedulerClusterInfoActor.default_uid())
            resource_ref = pool.create_actor(ResourceActor,
                                             uid=ResourceActor.default_uid())
            pool.create_actor(ChunkMetaActor, uid=ChunkMetaActor.default_uid())
            pool.create_actor(AssignerActor, uid=AssignerActor.default_uid())

            resource_ref.set_worker_meta('localhost:12345',
                                         dict(hardware=dict(cpu_total=4)))
            resource_ref.set_worker_meta('localhost:23456',
                                         dict(hardware=dict(cpu_total=4)))

            graph_key = str(uuid.uuid4())
            serialized_graph = serialize_graph(DAG())

            graph_ref = pool.create_actor(GraphActor,
                                          session_id,
                                          graph_key,
                                          serialized_graph,
                                          uid=GraphActor.gen_uid(
                                              session_id, graph_key))
            graph_ref.execute_graph()
            self.assertEqual(graph_ref.get_state(), GraphState.SUCCEEDED)
Beispiel #2
0
    def testErrorOnPrepare(self, *_):
        session_id = str(uuid.uuid4())

        addr = '127.0.0.1:%d' % get_next_port()
        with create_actor_pool(n_process=1, backend='gevent', address=addr) as pool:
            pool.create_actor(SchedulerClusterInfoActor, [pool.cluster_info.address],
                              uid=SchedulerClusterInfoActor.default_uid())
            resource_ref = pool.create_actor(ResourceActor, uid=ResourceActor.default_uid())
            pool.create_actor(ChunkMetaActor, uid=ChunkMetaActor.default_uid())
            pool.create_actor(AssignerActor, uid=AssignerActor.default_uid())

            resource_ref.set_worker_meta('localhost:12345', dict(hardware=dict(cpu_total=4)))
            resource_ref.set_worker_meta('localhost:23456', dict(hardware=dict(cpu_total=4)))

            # error occurred in create_operand_actors
            graph_key = str(uuid.uuid4())
            expr = mt.random.random((8, 2), chunk_size=2) + 1
            graph = expr.build_graph(compose=False)
            serialized_graph = serialize_graph(graph)

            graph_ref = pool.create_actor(GraphActor, session_id, graph_key, serialized_graph,
                                          uid=GraphActor.gen_uid(session_id, graph_key))

            def _mock_raises(*_, **__):
                raise RuntimeError

            with patch_method(GraphActor.create_operand_actors, new=_mock_raises):
                with self.assertRaises(RuntimeError):
                    graph_ref.execute_graph()
            self.assertEqual(graph_ref.get_state(), GraphState.FAILED)
            graph_ref.destroy()

            # interrupted during create_operand_actors
            graph_key = str(uuid.uuid4())
            graph_ref = pool.create_actor(GraphActor, session_id, graph_key, serialized_graph,
                                          uid=GraphActor.gen_uid(session_id, graph_key))

            def _mock_cancels(*_, **__):
                graph_meta_ref = pool.actor_ref(GraphMetaActor.gen_uid(session_id, graph_key))
                graph_meta_ref.set_state(GraphState.CANCELLING)

            with patch_method(GraphActor.create_operand_actors, new=_mock_cancels):
                graph_ref.execute_graph()
            self.assertEqual(graph_ref.get_state(), GraphState.CANCELLED)

            # interrupted during previous steps
            graph_key = str(uuid.uuid4())
            graph_ref = pool.create_actor(GraphActor, session_id, graph_key, serialized_graph,
                                          uid=GraphActor.gen_uid(session_id, graph_key))

            def _mock_cancels(*_, **__):
                graph_meta_ref = pool.actor_ref(GraphMetaActor.gen_uid(session_id, graph_key))
                graph_meta_ref.set_state(GraphState.CANCELLING)
                return dict()

            with patch_method(GraphAnalyzer.calc_operand_assignments, new=_mock_cancels):
                graph_ref.execute_graph()
            self.assertEqual(graph_ref.get_state(), GraphState.CANCELLED)
Beispiel #3
0
    def testAssignerActor(self):
        mock_scheduler_addr = '127.0.0.1:%d' % get_next_port()
        with create_actor_pool(n_process=1, backend='gevent', address=mock_scheduler_addr) as pool:
            cluster_info_ref = pool.create_actor(SchedulerClusterInfoActor, [pool.cluster_info.address],
                                                 uid=SchedulerClusterInfoActor.default_uid())
            resource_ref = pool.create_actor(ResourceActor, uid=ResourceActor.default_uid())
            pool.create_actor(ChunkMetaActor, uid=ChunkMetaActor.default_uid())

            endpoint1 = 'localhost:12345'
            endpoint2 = 'localhost:23456'
            res = dict(hardware=dict(cpu=4, memory=4096))

            def write_mock_meta():
                resource_ref.set_worker_meta(endpoint1, res)
                resource_ref.set_worker_meta(endpoint2, res)

            g = gevent.spawn(write_mock_meta)
            g.join()

            assigner_ref = pool.create_actor(AssignerActor, uid=AssignerActor.default_uid())

            session_id = str(uuid.uuid4())
            op_key = str(uuid.uuid4())
            chunk_key1 = str(uuid.uuid4())
            chunk_key2 = str(uuid.uuid4())
            chunk_key3 = str(uuid.uuid4())

            op_info = {
                'op_name': 'test_op',
                'io_meta': dict(input_chunks=[chunk_key1, chunk_key2, chunk_key3]),
                'retries': 0,
                'optimize': {
                    'depth': 0,
                    'demand_depths': (),
                    'successor_size': 1,
                    'descendant_size': 0
                }
            }

            chunk_meta_client = ChunkMetaClient(pool, cluster_info_ref)
            chunk_meta_client.set_chunk_meta(session_id, chunk_key1, size=512, workers=(endpoint1,))
            chunk_meta_client.set_chunk_meta(session_id, chunk_key2, size=512, workers=(endpoint1,))
            chunk_meta_client.set_chunk_meta(session_id, chunk_key3, size=512, workers=(endpoint2,))

            reply_ref = pool.create_actor(PromiseReplyTestActor)
            reply_callback = ((reply_ref.uid, reply_ref.address), 'reply')
            assigner_ref.apply_for_resource(session_id, op_key, op_info, callback=reply_callback)

            while not reply_ref.get_reply():
                gevent.sleep(0.1)
            _, ret_value = reply_ref.get_reply()
            self.assertEqual(ret_value[0], endpoint1)
Beispiel #4
0
    def testAssignerActor(self, *_):
        mock_scheduler_addr = f'127.0.0.1:{get_next_port()}'
        with create_actor_pool(n_process=1,
                               backend='gevent',
                               address=mock_scheduler_addr) as pool:
            cluster_info_ref = pool.create_actor(
                SchedulerClusterInfoActor, [pool.cluster_info.address],
                uid=SchedulerClusterInfoActor.default_uid())
            resource_ref = pool.create_actor(ResourceActor,
                                             uid=ResourceActor.default_uid())
            pool.create_actor(ChunkMetaActor, uid=ChunkMetaActor.default_uid())

            endpoint1 = 'localhost:12345'
            endpoint2 = 'localhost:23456'
            res = dict(hardware=dict(cpu=4, mem_quota=4096))

            resource_ref.set_worker_meta(endpoint1, res)
            resource_ref.set_worker_meta(endpoint2, res)

            assigner_ref = pool.create_actor(AssignerActor,
                                             uid=AssignerActor.default_uid())

            session_id = str(uuid.uuid4())
            op_key = str(uuid.uuid4())
            chunk_key1 = str(uuid.uuid4())
            chunk_key2 = str(uuid.uuid4())
            chunk_key3 = str(uuid.uuid4())

            op_info = {
                'op_name': 'test_op',
                'io_meta':
                dict(input_chunks=[chunk_key1, chunk_key2, chunk_key3]),
                'retries': 0,
                'optimize': {
                    'depth': 0,
                    'demand_depths': (),
                    'successor_size': 1,
                    'descendant_size': 0
                }
            }

            chunk_meta_client = ChunkMetaClient(pool, cluster_info_ref)
            chunk_meta_client.set_chunk_meta(session_id,
                                             chunk_key1,
                                             size=512,
                                             workers=(endpoint1, ))
            chunk_meta_client.set_chunk_meta(session_id,
                                             chunk_key2,
                                             size=512,
                                             workers=(endpoint1, ))
            chunk_meta_client.set_chunk_meta(session_id,
                                             chunk_key3,
                                             size=512,
                                             workers=(endpoint2, ))

            uid = OperandActor.gen_uid(session_id, op_key)
            reply_ref = pool.create_actor(MockOperandActor, uid=uid)
            assigner_ref.apply_for_resource(session_id, op_key, op_info)

            while not reply_ref.get_worker_ep():
                gevent.sleep(0.1)
            self.assertEqual(reply_ref.get_worker_ep(), endpoint1)

            with self.run_actor_test(pool) as test_actor, self.assertRaises(
                    TimeoutError):
                assigner_p_ref = test_actor.promise_ref(assigner_ref)

                try:
                    options.scheduler.assign_timeout = 1
                    res = dict(hardware=dict(cpu=4, mem_quota=0))
                    resource_ref.set_worker_meta(endpoint1, res)
                    resource_ref.set_worker_meta(endpoint2, res)

                    self.waitp(
                        assigner_p_ref.apply_for_resource(session_id,
                                                          op_key,
                                                          op_info,
                                                          _promise=True))
                finally:
                    options.scheduler.assign_timeout = 600
Beispiel #5
0
    def prepare_graph_in_pool(self, expr, clean_io_meta=True, compose=False):
        session_id = str(uuid.uuid4())
        graph_key = str(uuid.uuid4())

        graph = expr.build_graph(compose=compose)
        serialized_graph = serialize_graph(graph)
        chunked_graph = expr.build_graph(compose=compose, tiled=True)

        addr = '127.0.0.1:%d' % get_next_port()
        with create_actor_pool(n_process=1, backend='gevent', address=addr) as pool:
            pool.create_actor(SchedulerClusterInfoActor, [pool.cluster_info.address],
                              uid=SchedulerClusterInfoActor.default_uid())
            resource_ref = pool.create_actor(ResourceActor, uid=ResourceActor.default_uid())
            pool.create_actor(ChunkMetaActor, uid=ChunkMetaActor.default_uid())
            pool.create_actor(AssignerActor, uid=AssignerActor.default_uid())
            graph_ref = pool.create_actor(GraphActor, session_id, graph_key, serialized_graph,
                                          uid=GraphActor.gen_uid(session_id, graph_key))

            graph_ref.prepare_graph(compose=compose)
            fetched_graph = graph_ref.get_chunk_graph()
            self.assertIsNotNone(fetched_graph)
            self.assertEqual(len(chunked_graph), len(fetched_graph))

            graph_ref.analyze_graph(do_placement=False)
            op_infos = graph_ref.get_operand_info()
            for n in fetched_graph:
                depth = op_infos[n.op.key]['optimize']['depth']
                self.assertIsNotNone(depth)
                successor_size = op_infos[n.op.key]['optimize']['successor_size']
                self.assertIsNotNone(successor_size)
                descendant_size = op_infos[n.op.key]['optimize']['descendant_size']
                self.assertIsNotNone(descendant_size)

            resource_ref.set_worker_meta('localhost:12345', dict(hardware=dict(cpu_total=4)))
            resource_ref.set_worker_meta('localhost:23456', dict(hardware=dict(cpu_total=4)))

            graph_ref.analyze_graph()
            op_infos = graph_ref.get_operand_info()

            for n in fetched_graph:
                if fetched_graph.count_predecessors(n) != 0:
                    continue
                target_worker = op_infos[n.op.key]['target_worker']
                self.assertIsNotNone(target_worker)

            graph_ref.create_operand_actors(_clean_info=clean_io_meta)
            op_infos = graph_ref.get_operand_info()

            if not clean_io_meta:
                orig_metas = dict()
                for n in fetched_graph:
                    try:
                        meta = orig_metas[n.op.key]
                    except KeyError:
                        meta = orig_metas[n.op.key] = dict(
                            predecessors=set(), successors=set(), input_chunks=set(), chunks=set()
                        )
                    meta['predecessors'].update([pn.op.key for pn in fetched_graph.iter_predecessors(n)])
                    meta['successors'].update([sn.op.key for sn in fetched_graph.iter_successors(n)])
                    meta['input_chunks'].update([pn.key for pn in fetched_graph.iter_predecessors(n)])
                    meta['chunks'].update([c.key for c in n.op.outputs])

                for n in fetched_graph:
                    self.assertEqual(op_infos[n.op.key]['op_name'], type(n.op).__name__)

                    io_meta = op_infos[n.op.key]['io_meta']
                    orig_io_meta = orig_metas[n.op.key]

                    self.assertSetEqual(set(io_meta['predecessors']), set(orig_io_meta['predecessors']))
                    self.assertSetEqual(set(io_meta['successors']), set(orig_io_meta['successors']))
                    self.assertSetEqual(set(io_meta['input_chunks']), set(orig_io_meta['input_chunks']))
                    self.assertSetEqual(set(io_meta['chunks']), set(orig_io_meta['chunks']))

            yield pool, graph_ref