Python PipelineSplitExecutorCoordinator 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: ray.data.impl.pipeline_executor

클래스/타입: PipelineSplitExecutorCoordinator

hotexamples.com에서의 예제들: 3

Python PipelineSplitExecutorCoordinator - 3개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 ray.data.impl.pipeline_executor.PipelineSplitExecutorCoordinator에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

remote(2)

options(1)

자주 사용되는 메소드들

remote (2)

options (1)

예제 #1

파일 보기

파일: dataset_pipeline.py 프로젝트: patrickstuedi/ray

    def _split(self, n: int, splitter: Callable[[Dataset],
                                                "DatasetPipeline[T]"]):

        resources = {}
        if not ray.util.client.ray.is_connected():
            # Pin the coordinator (and any child actors) to the local node to avoid
            # errors during node failures. If the local node dies, then the driver
            # will fate-share with the coordinator anyway.
            resources["node:{}".format(
                ray.util.get_node_ip_address())] = 0.0001

        coordinator = PipelineSplitExecutorCoordinator.options(
            resources=resources,
            placement_group=None,
        ).remote(self, n, splitter, DatasetContext.get_current())
        if self._executed[0]:
            raise RuntimeError("Pipeline cannot be read multiple times.")
        self._executed[0] = True

        class SplitIterator:
            def __init__(self, split_index, coordinator):
                self.split_index = split_index
                self.coordinator = coordinator
                self.warn_threshold = 100
                self.wait_delay_s = 0.1

            def __iter__(self):
                return self

            def __next__(self):
                ds = None
                tries = 0
                while ds is None:
                    ds = ray.get(
                        self.coordinator.next_dataset_if_ready.remote(
                            self.split_index))
                    # Wait for other shards to catch up reading.
                    if not ds:
                        time.sleep(self.wait_delay_s)
                        tries += 1
                    if tries > self.warn_threshold:
                        print("Warning: reader on shard {} of the pipeline "
                              "has been blocked more than {}s waiting for "
                              "other readers to catch up. All pipeline shards "
                              "must be read from concurrently.".format(
                                  self.split_index,
                                  self.wait_delay_s * self.warn_threshold,
                              ))
                        self.warn_threshold *= 2
                return lambda: ds

        return [
            # Disable progress bars for the split readers since they would
            # overwhelm the console.
            DatasetPipeline(
                SplitIterator(idx, coordinator),
                length=self._length,
                progress_bars=False,
            ) for idx in range(n)
        ]

예제 #2

파일 보기

    def _split(self, n: int, splitter: Callable[[Dataset],
                                                "DatasetPipeline[T]"]):

        coordinator = PipelineSplitExecutorCoordinator.remote(
            self, n, splitter, DatasetContext.get_current())
        if self._executed[0]:
            raise RuntimeError("Pipeline cannot be read multiple times.")
        self._executed[0] = True

        class SplitIterator:
            def __init__(self, split_index, coordinator):
                self.split_index = split_index
                self.coordinator = coordinator
                self.warn_threshold = 100
                self.wait_delay_s = 0.1

            def __iter__(self):
                return self

            def __next__(self):
                ds = None
                tries = 0
                while ds is None:
                    ds = ray.get(
                        self.coordinator.next_dataset_if_ready.remote(
                            self.split_index))
                    # Wait for other shards to catch up reading.
                    if not ds:
                        time.sleep(self.wait_delay_s)
                        tries += 1
                    if tries > self.warn_threshold:
                        print("Warning: reader on shard {} of the pipeline "
                              "has been blocked more than {}s waiting for "
                              "other readers to catch up. All pipeline shards "
                              "must be read from concurrently.".format(
                                  self.split_index,
                                  self.wait_delay_s * self.warn_threshold,
                              ))
                        self.warn_threshold *= 2
                return lambda: ds

        return [
            # Disable progress bars for the split readers since they would
            # overwhelm the console.
            DatasetPipeline(
                SplitIterator(idx, coordinator),
                length=self._length,
                progress_bars=False,
            ) for idx in range(n)
        ]

예제 #3

파일 보기

파일: dataset_pipeline.py 프로젝트: haochihlin/ray

    def _split(self, n: int,
               splitter: Callable[[Dataset], "DatasetPipeline[T]"]):

        coordinator = PipelineSplitExecutorCoordinator.remote(
            self, n, splitter)

        class SplitIterator:
            def __init__(self, split_index, coordinator):
                self.split_index = split_index
                self.coordinator = coordinator
                self.warn_threshold = 100
                self.wait_delay_s = 0.1

            def __iter__(self):
                return self

            def __next__(self):
                ds = None
                tries = 0
                while ds is None:
                    ds = ray.get(
                        self.coordinator.next_dataset_if_ready.remote(
                            self.split_index))
                    # Wait for other shards to catch up reading.
                    if not ds:
                        time.sleep(self.wait_delay_s)
                        tries += 1
                    if tries > self.warn_threshold:
                        print("Warning: shard {} of the pipeline has been "
                              "stalled more than {}s waiting for other shards "
                              "to catch up.".format(
                                  self.split_index,
                                  self.wait_delay_s * self.warn_threshold))
                        self.warn_threshold *= 2
                return lambda: ds

        return [
            # Disable progress bars for the split readers since they would
            # overwhelm the console.
            DatasetPipeline(
                SplitIterator(idx, coordinator), progress_bars=False)
            for idx in range(n)
        ]