Python cancel Exemples, ray.cancel Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : test_cancel.py Projet : vishalbelsare/ray

def test_single_cpu_cancel(shutdown_only, use_force):
    ray.init(num_cpus=1)
    signaler = SignalActor.remote()

    @ray.remote
    def wait_for(t):
        return ray.get(t[0])

    obj1 = wait_for.remote([signaler.wait.remote()])
    obj2 = wait_for.remote([obj1])
    obj3 = wait_for.remote([obj2])
    indep = wait_for.remote([signaler.wait.remote()])

    assert len(ray.wait([obj3], timeout=0.1)[0]) == 0
    ray.cancel(obj3, force=use_force)
    with pytest.raises(valid_exceptions(use_force)):
        ray.get(obj3)

    ray.cancel(obj1, force=use_force)

    for d in [obj1, obj2]:
        with pytest.raises(valid_exceptions(use_force)):
            ray.get(d)

    signaler.send.remote()
    ray.get(indep)

Exemple #2

0

Afficher le fichier

Fichier : test_job_manager.py Projet : smorad/ray

async def test_job_runs_with_no_resources_available(job_manager):
    script_path = _driver_script_path("consume_one_cpu.py")

    hang_signal_actor = SignalActor.remote()

    @ray.remote(num_cpus=ray.available_resources()["CPU"])
    def consume_all_cpus():
        ray.get(hang_signal_actor.wait.remote())

    # Start a hanging task that consumes all CPUs.
    hanging_ref = consume_all_cpus.remote()

    try:
        # Check that the job starts up properly even with no CPUs available.
        # The job won't exit until it has a CPU available because it waits for
        # a task.
        job_id = job_manager.submit_job(entrypoint=f"python {script_path}")
        await async_wait_for_condition(check_job_running,
                                       job_manager=job_manager,
                                       job_id=job_id)
        await async_wait_for_condition(
            lambda: "Hanging..." in job_manager.get_job_logs(job_id))

        # Signal the hanging task to exit and release its CPUs.
        ray.get(hang_signal_actor.send.remote())

        # Check the job succeeds now that resources are available.
        await async_wait_for_condition(check_job_succeeded,
                                       job_manager=job_manager,
                                       job_id=job_id)
        await async_wait_for_condition(
            lambda: "Success!" in job_manager.get_job_logs(job_id))
    finally:
        # Just in case the test fails.
        ray.cancel(hanging_ref)

Exemple #3

0

Afficher le fichier

Fichier : compute.py Projet : patrickstuedi/ray

    def _apply(
        self,
        fn: Any,
        remote_args: dict,
        block_list: BlockList,
        clear_input_blocks: bool,
    ) -> BlockList:
        context = DatasetContext.get_current()

        # Handle empty datasets.
        if block_list.initial_num_blocks() == 0:
            return block_list

        blocks = block_list.get_blocks_with_metadata()
        map_bar = ProgressBar("Map Progress", total=len(blocks))

        if context.block_splitting_enabled:
            map_block = cached_remote_fn(_map_block_split).options(**remote_args)
            refs = [map_block.remote(b, fn, m.input_files) for b, m in blocks]
        else:
            map_block = cached_remote_fn(_map_block_nosplit).options(
                **dict(remote_args, num_returns=2)
            )
            all_refs = [map_block.remote(b, fn, m.input_files) for b, m in blocks]
            data_refs = [r[0] for r in all_refs]
            refs = [r[1] for r in all_refs]

        # Release input block references.
        if clear_input_blocks:
            del blocks
            block_list.clear()

        # Common wait for non-data refs.
        try:
            results = map_bar.fetch_until_complete(refs)
        except (ray.exceptions.RayTaskError, KeyboardInterrupt) as e:
            # One or more mapper tasks failed, or we received a SIGINT signal
            # while waiting; either way, we cancel all map tasks.
            for ref in refs:
                ray.cancel(ref)
            # Wait until all tasks have failed or been cancelled.
            for ref in refs:
                try:
                    ray.get(ref)
                except (ray.exceptions.RayTaskError, ray.exceptions.TaskCancelledError):
                    pass
            # Reraise the original task failure exception.
            raise e from None

        new_blocks, new_metadata = [], []
        if context.block_splitting_enabled:
            for result in results:
                for block, metadata in result:
                    new_blocks.append(block)
                    new_metadata.append(metadata)
        else:
            for block, metadata in zip(data_refs, results):
                new_blocks.append(block)
                new_metadata.append(metadata)
        return BlockList(list(new_blocks), list(new_metadata))

Exemple #4

0

Afficher le fichier

Fichier : test_cancel.py Projet : vishalbelsare/ray

def test_comprehensive(ray_start_regular, use_force):
    signaler = SignalActor.remote()

    @ray.remote
    def wait_for(t):
        ray.get(t[0])
        return "Result"

    @ray.remote
    def combine(a, b):
        return str(a) + str(b)

    a = wait_for.remote([signaler.wait.remote()])
    b = wait_for.remote([signaler.wait.remote()])
    combo = combine.remote(a, b)
    a2 = wait_for.remote([a])

    assert len(ray.wait([a, b, a2, combo], timeout=1)[0]) == 0

    ray.cancel(a, force=use_force)
    with pytest.raises(valid_exceptions(use_force)):
        ray.get(a, timeout=10)

    with pytest.raises(valid_exceptions(use_force)):
        ray.get(a2, timeout=40)

    signaler.send.remote()

    with pytest.raises(valid_exceptions(use_force)):
        ray.get(combo)

Exemple #5

0

Afficher le fichier

Fichier : test_cancel.py Projet : vishalbelsare/ray

def test_recursive_cancel(shutdown_only, use_force):
    ray.init(num_cpus=4)

    @ray.remote(num_cpus=1)
    def inner():
        while True:
            time.sleep(0.1)

    @ray.remote(num_cpus=1)
    def outer():

        x = [inner.remote()]
        print(x)
        while True:
            time.sleep(0.1)

    @ray.remote(num_cpus=4)
    def many_resources():
        return 300

    outer_fut = outer.remote()
    many_fut = many_resources.remote()
    with pytest.raises(GetTimeoutError):
        ray.get(many_fut, timeout=1)
    ray.cancel(outer_fut)
    with pytest.raises(valid_exceptions(use_force)):
        ray.get(outer_fut, timeout=10)

    assert ray.get(many_fut, timeout=30)

Exemple #6

0

Afficher le fichier

Fichier : test_cancel.py Projet : vishalbelsare/ray

def test_cancel_multiple_dependents(ray_start_regular, use_force):
    signaler = SignalActor.remote()

    @ray.remote
    def wait_for(t):
        return ray.get(t[0])

    head = wait_for.remote([signaler.wait.remote()])
    deps = []
    for _ in range(3):
        deps.append(wait_for.remote([head]))

    assert len(ray.wait([head], timeout=0.1)[0]) == 0
    ray.cancel(head, force=use_force)
    for d in deps:
        with pytest.raises(valid_exceptions(use_force)):
            ray.get(d)

    head2 = wait_for.remote([signaler.wait.remote()])

    deps2 = []
    for _ in range(3):
        deps2.append(wait_for.remote([head]))

    for d in deps2:
        ray.cancel(d, force=use_force)

    for d in deps2:
        with pytest.raises(valid_exceptions(use_force)):
            ray.get(d)

    signaler.send.remote()
    ray.get(head2)

Exemple #7

0

Afficher le fichier

 def Terminate(self, req, context=None):
     if req.WhichOneof("terminate_type") == "task_object":
         try:
             object_ref = self.object_refs[req.client_id][
                 req.task_object.id]
             with disable_client_hook():
                 ray.cancel(
                     object_ref,
                     force=req.task_object.force,
                     recursive=req.task_object.recursive,
                 )
         except Exception as e:
             return_exception_in_context(e, context)
     elif req.WhichOneof("terminate_type") == "actor":
         try:
             actor_ref = self.actor_refs[req.actor.id]
             with disable_client_hook():
                 ray.kill(actor_ref, no_restart=req.actor.no_restart)
         except Exception as e:
             return_exception_in_context(e, context)
     else:
         raise RuntimeError(
             "Client requested termination without providing a valid "
             "terminate_type")
     return ray_client_pb2.TerminateResponse(ok=True)

Exemple #8

0

Afficher le fichier

Fichier : test_cancel.py Projet : zommiommy/ray

def test_fast(shutdown_only, use_force):
    ray.init(num_cpus=2)

    @ray.remote
    def fast(y):
        return y

    signaler = SignalActor.remote()
    ids = list()
    for _ in range(100):
        x = fast.remote("a")
        ray.cancel(x)
        ids.append(x)

    @ray.remote
    def wait_for(y):
        return y

    sig = signaler.wait.remote()
    for _ in range(5000):
        x = wait_for.remote(sig)
        ids.append(x)

    for idx in range(100, 5100):
        if random.random() > 0.95:
            ray.cancel(ids[idx])
    signaler.send.remote()
    for obj_id in ids:
        try:
            ray.get(obj_id)
        except Exception as e:
            assert isinstance(e, valid_exceptions(use_force))

Exemple #9

0

Afficher le fichier

    def deploy_app(
        self,
        import_path: str,
        runtime_env: Dict,
        deployment_override_options: List[Dict],
    ) -> None:
        """Kicks off a task that deploys a Serve application.

        Cancels any previous in-progress task that is deploying a Serve
        application.

        Args:
            import_path: Serve deployment graph's import path
            runtime_env: runtime_env to run the deployment graph in
            deployment_override_options: All dictionaries should
                contain argument-value options that can be passed directly
                into a set_options() call. Overrides deployment options set
                in the graph itself.
        """

        if self.config_deployment_request_ref is not None:
            ray.cancel(self.config_deployment_request_ref)
            logger.info("Received new config deployment request. Cancelling "
                        "previous request.")

        self.config_deployment_request_ref = run_graph.options(
            runtime_env=runtime_env).remote(import_path, runtime_env,
                                            deployment_override_options)

        self.deployment_timestamp = time.time()

Exemple #10

0

Afficher le fichier

 def cancel(self) -> None:
     """Cancel the running workflow."""
     for fut, workflow_ref in self._state.running_frontier.items():
         fut.cancel()
         try:
             ray.cancel(workflow_ref.ref, force=True)
         except Exception:
             pass

Exemple #11

0

Afficher le fichier

Fichier : ray.py Projet : mje-nz/mjecv

 def finish(self, result_handler=None):
     try:
         while self._waiting:
             ready = self._wait()
             if result_handler:
                 for filename_id in ready:
                     result_handler(ray.get(filename_id))
     except KeyboardInterrupt:
         for result_id in self._waiting:
             ray.cancel(result_id)

Exemple #12

0

Afficher le fichier

 def _stop_workers(self):
     if self._worker_tasks:
         logging.info("Terminating workers")
     for index, task in self._worker_tasks.items():
         try:
             ray.cancel(task, force=True)
         except Exception:
             pass
     self._worker_tasks = {}
     self._workers = {}

Exemple #13

0

Afficher le fichier

Fichier : worker_executor.py Projet : u1234x1234/KDD-Cup-2020-AutoGraph-Challenge

    def stop(self, force=True):
        for _ in range(len(self._workers)):
            self._r_client.lpush(TASK_QUEUE_KEY, serialize(None))

        if not force:
            raise NotImplementedError

        for w_id in self._workers:
            ray.cancel(w_id, force=True)
        for key in [TASK_QUEUE_KEY, RESULTS_QUEUE_KEY, DATA_KEY]:
            self._r_client.delete(key)

Exemple #14

0

Afficher le fichier

 async def force_worker_checkpoint(self):
     if self._worker_tasks:
         logging.info("Checkpoint needed: stopping workers")
     async with self._checkpoint_lock:
         if self._running:
             for index, task in self._worker_tasks.items():
                 try:
                     ray.cancel(task, force=False)
                 except Exception as e:
                     raise e
             self._running = False

Exemple #15

0

Afficher le fichier

Fichier : ray_util.py Projet : lunaelf/password-cracking

def stop_computation():
    """
    停止 Ray 节点的计算任务
    """
    print('stop_computation()')
    global result_ids
    global is_canceled

    is_canceled = True
    for result_id in result_ids:
        ray.cancel(result_id, force=True)
    result_ids = []
    is_canceled = False

Exemple #16

0

Afficher le fichier

Fichier : test_memory_scheduling.py Projet : zivzone/ray

def test_object_store_memory_reporting_task():
    @ray.remote
    def f(x):
        time.sleep(60)

    try:
        ray.init(num_cpus=1, object_store_memory=500 * MB)
        wait_for_condition(lambda: object_store_memory(500 * MB))
        x1 = f.remote(np.zeros(150 * 1024 * 1024, dtype=np.uint8))
        wait_for_condition(lambda: object_store_memory(350 * MB))
        ray.cancel(x1, force=True)
        wait_for_condition(lambda: object_store_memory(500 * MB))
    finally:
        ray.shutdown()

Exemple #17

0

Afficher le fichier

Fichier : controller.py Projet : parasj/ray

    def deploy_app(
        self, config: ServeApplicationSchema, update_time: bool = True
    ) -> None:
        """Kicks off a task that deploys a Serve application.

        Cancels any previous in-progress task that is deploying a Serve
        application.

        Args:
            config: Contains the following:
                import_path: Serve deployment graph's import path
                runtime_env: runtime_env to run the deployment graph in
                deployment_override_options: Dictionaries that
                    contain argument-value options that can be passed directly
                    into a set_options() call. Overrides deployment options set
                    in the graph's code itself.
            update_time: Whether to update the deployment_timestamp.
        """

        if update_time:
            self.deployment_timestamp = time.time()

        config_dict = config.dict(exclude_unset=True)
        self.kv_store.put(
            CONFIG_CHECKPOINT_KEY,
            pickle.dumps((self.deployment_timestamp, config_dict)),
        )

        if self.config_deployment_request_ref is not None:
            ray.cancel(self.config_deployment_request_ref)
            logger.info(
                "Received new config deployment request. Cancelling "
                "previous request."
            )

        deployment_override_options = config.dict(
            by_alias=True, exclude_unset=True
        ).get("deployments", [])

        self.config_deployment_request_ref = run_graph.options(
            runtime_env=config.runtime_env
        ).remote(config.import_path, config.runtime_env, deployment_override_options)

Exemple #18

0

Afficher le fichier

Fichier : server.py Projet : tseiger1/ray

 def Terminate(self, request, context=None):
     if request.WhichOneof("terminate_type") == "task_object":
         try:
             object_ref = cloudpickle.loads(request.task_object.handle)
             ray.cancel(object_ref,
                        force=request.task_object.force,
                        recursive=request.task_object.recursive)
         except Exception as e:
             return_exception_in_context(e, context)
     elif request.WhichOneof("terminate_type") == "actor":
         try:
             actor_ref = cloudpickle.loads(request.actor.handle)
             ray.kill(actor_ref, no_restart=request.actor.no_restart)
         except Exception as e:
             return_exception_in_context(e, context)
     else:
         raise RuntimeError(
             "Client requested termination without providing a valid "
             "terminate_type")
     return ray_client_pb2.TerminateResponse(ok=True)

Exemple #19

0

Afficher le fichier

def test_stress(shutdown_only, use_force):
    ray.init(num_cpus=1)

    @ray.remote
    def infinite_sleep(y):
        if y:
            while True:
                time.sleep(1 / 10)

    first = infinite_sleep.remote(True)

    sleep_or_no = [random.randint(0, 1) for _ in range(100)]
    tasks = [infinite_sleep.remote(i) for i in sleep_or_no]
    cancelled = set()
    for t in tasks:
        if random.random() > 0.5:
            ray.cancel(t, force=use_force)
            cancelled.add(t)

    ray.cancel(first, force=use_force)
    cancelled.add(first)

    for done in cancelled:
        with pytest.raises(valid_exceptions(use_force)):
            ray.get(done, timeout=120)
    for indx, t in enumerate(tasks):
        if sleep_or_no[indx]:
            ray.cancel(t, force=use_force)
            cancelled.add(t)
        if t in cancelled:
            with pytest.raises(valid_exceptions(use_force)):
                ray.get(t, timeout=120)
        else:
            ray.get(t, timeout=120)

Exemple #20

0

Afficher le fichier

Fichier : generator.py Projet : jnyjxn/AngioGen

def generate_networks(cfg, overwrite=False):
    seed_start, seed_end = cfg.get_config(
        "meta/random_seeds/start"), cfg.get_config("meta/random_seeds/end")
    seeds = range(seed_start, seed_end + 1)

    initialise_ray(cfg)

    try:
        futures = [
            generate_one_network.remote(cfg, seed, overwrite) for seed in seeds
        ]

        wait_for_completion(futures, len(seeds))
    except Exception as e:
        [ray.cancel(process) for process in futures]
        print(e)
    except KeyboardInterrupt as k:
        [ray.cancel(process) for process in futures]
        try:
            sys.exit(k)
        except SystemExit:
            os._exit(k)

Exemple #21

0

Afficher le fichier

Fichier : test_cancel.py Projet : vishalbelsare/ray

def test_fast(shutdown_only, use_force):
    ray.init(num_cpus=2)

    @ray.remote
    def fast(y):
        return y

    signaler = SignalActor.remote()
    ids = list()
    for _ in range(100):
        x = fast.remote("a")
        # NOTE If a non-force Cancellation is attempted in the time
        # between a worker receiving a task and the worker executing
        # that task (specifically the python execution), Cancellation
        # can fail.

        time.sleep(0.1)
        ray.cancel(x, force=use_force)
        ids.append(x)

    @ray.remote
    def wait_for(y):
        return y

    sig = signaler.wait.remote()
    for _ in range(5000):
        x = wait_for.remote(sig)
        ids.append(x)

    for idx in range(100, 5100):
        if random.random() > 0.95:
            ray.cancel(ids[idx], force=use_force)
    signaler.send.remote()
    for i, obj_ref in enumerate(ids):
        try:
            ray.get(obj_ref, timeout=120)
        except Exception as e:
            assert isinstance(
                e, valid_exceptions(use_force)), f"Failure on iteration: {i}"

Exemple #22

0

Afficher le fichier

Fichier : test_cancel.py Projet : vishalbelsare/ray

def test_cancel_chain(ray_start_regular, use_force):
    signaler = SignalActor.remote()

    @ray.remote
    def wait_for(t):
        return ray.get(t[0])

    obj1 = wait_for.remote([signaler.wait.remote()])
    obj2 = wait_for.remote([obj1])
    obj3 = wait_for.remote([obj2])
    obj4 = wait_for.remote([obj3])

    assert len(ray.wait([obj1], timeout=0.1)[0]) == 0
    ray.cancel(obj1, force=use_force)
    for ob in [obj1, obj2, obj3, obj4]:
        with pytest.raises(valid_exceptions(use_force)):
            ray.get(ob)

    signaler2 = SignalActor.remote()
    obj1 = wait_for.remote([signaler2.wait.remote()])
    obj2 = wait_for.remote([obj1])
    obj3 = wait_for.remote([obj2])
    obj4 = wait_for.remote([obj3])

    assert len(ray.wait([obj3], timeout=0.1)[0]) == 0
    ray.cancel(obj3, force=use_force)
    for ob in [obj3, obj4]:
        with pytest.raises(valid_exceptions(use_force)):
            ray.get(ob)

    with pytest.raises(GetTimeoutError):
        ray.get(obj1, timeout=0.1)

    with pytest.raises(GetTimeoutError):
        ray.get(obj2, timeout=0.1)

    signaler2.send.remote()
    ray.get(obj1)

Exemple #23

0

Afficher le fichier

Fichier : compute.py Projet : kaushikb11/ray

    def apply(self, fn: Any, remote_args: dict,
              blocks: BlockList) -> BlockList:
        # Handle empty datasets.
        if blocks.initial_num_blocks() == 0:
            return blocks

        blocks = list(blocks.iter_blocks_with_metadata())
        map_bar = ProgressBar("Map Progress", total=len(blocks))

        map_block = cached_remote_fn(_map_block)
        refs = [
            map_block.options(**remote_args).remote(b, fn, m.input_files)
            for b, m in blocks
        ]

        try:
            results = map_bar.fetch_until_complete(refs)
        except (ray.exceptions.RayTaskError, KeyboardInterrupt) as e:
            # One or more mapper tasks failed, or we received a SIGINT signal
            # while waiting; either way, we cancel all map tasks.
            for ref in refs:
                ray.cancel(ref)
            # Wait until all tasks have failed or been cancelled.
            for ref in refs:
                try:
                    ray.get(ref)
                except (ray.exceptions.RayTaskError,
                        ray.exceptions.TaskCancelledError):
                    pass
            # Reraise the original task failure exception.
            raise e from None

        new_blocks, new_metadata = [], []
        for result in results:
            for block, metadata in result:
                new_blocks.append(block)
                new_metadata.append(metadata)
        return BlockList(list(new_blocks), list(new_metadata))

Exemple #24

0

Afficher le fichier

Fichier : test_cancel.py Projet : vishalbelsare/ray

def test_remote_cancel(ray_start_regular, use_force):
    signaler = SignalActor.remote()

    @ray.remote
    def wait_for(y):
        return ray.get(y[0])

    @ray.remote
    def remote_wait(sg):
        return [wait_for.remote([sg[0]])]

    sig = signaler.wait.remote()

    outer = remote_wait.remote([sig])
    inner = ray.get(outer)[0]

    with pytest.raises(GetTimeoutError):
        ray.get(inner, timeout=1)

    ray.cancel(inner, force=use_force)

    with pytest.raises(valid_exceptions(use_force)):
        ray.get(inner, timeout=10)

Exemple #25

0

Afficher le fichier

def test_worker(ray_fix):
    controller = MockedController.options(name="AdaptDLController").remote()
    rank = 0
    replicas = 2
    restarts = 3
    checkpoint = None
    offset = 50
    path = "ray/adaptdl_ray/aws/_example_worker.py"
    argv = ["--arg1", "value", "--arg2", "value"]

    worker_task = run_adaptdl.remote("test_key", "test_uid", rank, replicas,
                                     restarts, checkpoint, offset, path, argv)

    # can't cancel with force=True
    time.sleep(10)
    ray.cancel(worker_task, force=False)
    print("canceling")
    time.sleep(10)
    checkpoint = ray.get(controller.get_checkpoint.remote())
    print(checkpoint)
    assert ('file.txt' in checkpoint)
    ray.cancel(worker_task, force=False)

    rank = 1
    replicas = 2
    restarts = 4
    offset = 50

    worker_task = run_adaptdl.remote("test_key_2", "test_uid_2", rank,
                                     replicas, restarts, checkpoint, offset,
                                     path, argv)

    time.sleep(10)
    assert (os.path.exists("/tmp/checkpoint-test_uid_2-1/file.txt"))
    with open("/tmp/checkpoint-test_uid_2-1/file.txt", "rb") as f:
        result = int(f.read())
        assert (result == 5)

Exemple #26

0

Afficher le fichier

def test_pipeline_splitting_has_no_spilling(shutdown_only):
    # The object store is about 800MiB.
    ctx = ray.init(num_cpus=1, object_store_memory=800e6)
    # The size of dataset is 50000*(80*80*4)*8B, about 10GiB, 50MiB/block.
    ds = ray.data.range_tensor(50000, shape=(80, 80, 4), parallelism=200)

    # 2 blocks/window.
    p = ds.window(bytes_per_window=100 * 1024 * 1024).repeat()
    p1, p2 = p.split(2)

    @ray.remote
    def consume(p):
        for batch in p.iter_batches():
            pass

    tasks = [consume.remote(p1), consume.remote(p2)]
    try:
        # Run it for 20 seconds.
        ray.get(tasks, timeout=20)
    except Exception:
        for t in tasks:
            ray.cancel(t, force=True)
    meminfo = memory_summary(ctx.address_info["address"], stats_only=True)
    assert "Spilled" not in meminfo, meminfo

Exemple #27

0

Afficher le fichier

    def apply(self, fn: Any, remote_args: dict,
              blocks: BlockList[Any]) -> BlockList[Any]:
        # Handle empty datasets.
        if len(blocks) == 0:
            return blocks

        map_bar = ProgressBar("Map Progress", total=len(blocks))

        kwargs = remote_args.copy()
        kwargs["num_returns"] = 2

        map_block = cached_remote_fn(_map_block)
        refs = [
            map_block.options(**kwargs).remote(b, m, fn)
            for b, m in zip(blocks, blocks.get_metadata())
        ]
        new_blocks, new_metadata = zip(*refs)

        new_metadata = list(new_metadata)
        try:
            new_metadata = map_bar.fetch_until_complete(new_metadata)
        except (ray.exceptions.RayTaskError, KeyboardInterrupt) as e:
            # One or more mapper tasks failed, or we received a SIGINT signal
            # while waiting; either way, we cancel all map tasks.
            for ref in new_metadata:
                ray.cancel(ref)
            # Wait until all tasks have failed or been cancelled.
            for ref in new_metadata:
                try:
                    ray.get(ref)
                except (ray.exceptions.RayTaskError,
                        ray.exceptions.TaskCancelledError):
                    pass
            # Reraise the original task failure exception.
            raise e from None
        return BlockList(list(new_blocks), list(new_metadata))

Exemple #28

0

Afficher le fichier

def test_errors_before_initializing_ray():
    @ray.remote
    def f():
        pass

    @ray.remote
    class Foo:
        pass

    api_methods = [
        f.remote,
        Foo.remote,
        ray.actors,
        lambda: ray.cancel(None),  # Not valid API usage.
        lambda: ray.get([]),
        lambda: ray.get_actor("name"),
        ray.get_gpu_ids,
        ray.get_resource_ids,
        ray.get_webui_url,
        ray.jobs,
        lambda: ray.kill(None),  # Not valid API usage.
        ray.nodes,
        ray.objects,
        lambda: ray.put(1),
        lambda: ray.wait([])
    ]

    def test_exceptions_raised():
        for api_method in api_methods:
            print(api_method)
            with pytest.raises(ray.exceptions.RayConnectionError,
                               match="Ray has not been started yet."):
                api_method()

    test_exceptions_raised()

    # Make sure that the exceptions are still raised after Ray has been
    # started and shutdown.
    ray.init(num_cpus=0)
    ray.shutdown()

    test_exceptions_raised()

Exemple #29

0

Afficher le fichier

timeit.timeit(lambda: in_order(), number=1)

# In[ ]:

#tag::handle_bad_futures[]
futures = list(map(lambda x: remote_task.remote(x),
                   [1, threading.TIMEOUT_MAX]))
# While we still have pending futures
while len(futures) > 0:
    # In practice 10 seconds is too short for most cases.
    ready_futures, rest_futures = ray.wait(futures, timeout=10, num_returns=1)
    # If we get back anything less than num_returns
    if len(ready_futures) < 1:
        print(f"Timed out on {rest_futures}")
        # You don't _have to cancel_ but if you've your task is using a lot of resources
        ray.cancel(*rest_futures)
        # You should break since you exceeded your timeout
        break
    for id in ready_futures:
        print(f'completed value {id}, result {ray.get(id)}')
        futures = rest_futures
#end::handle_bad_futures[]

# In[ ]:

remote_task.remote(1)

# In[ ]:


#tag::ray_remote_seq[]

Exemple #30

0

Afficher le fichier

 def cancel(self, obj, *, force=False, recursive=True):
     return ray.cancel(obj, force=force, recursive=recursive)