Ejemplo n.º 1
0
    def test_deprecated_local_processes(self):
        with pytest.warns(UserWarning, match="local_processes"):
            executor = DaskExecutor(
                cluster_class="distributed.LocalCluster",
                client_kwargs={"set_as_default": True},
                local_processes=True,
            )
        assert executor.cluster_class == distributed.LocalCluster
        assert executor.cluster_kwargs == {
            "processes": True,
            "silence_logs": logging.CRITICAL,
        }
        assert executor.client_kwargs == {"set_as_default": True}

        # When not using a LocalCluster, `local_processes` warns, but isn't
        # added to the kwargs
        with pytest.warns(UserWarning, match="local_processes"):

            class TestCluster(object):
                pass

            executor = DaskExecutor(cluster_class=TestCluster, local_processes=True)

        assert executor.cluster_class == TestCluster
        assert executor.cluster_kwargs == {}
        assert executor.client_kwargs == {"set_as_default": False}
Ejemplo n.º 2
0
    def test_local_cluster_adapt(self):
        adapt_kwargs = {"minimum": 1, "maximum": 1}
        called_with = None

        class MyCluster(distributed.LocalCluster):
            def adapt(self, **kwargs):
                nonlocal called_with
                called_with = kwargs
                super().adapt(**kwargs)

        executor = DaskExecutor(
            cluster_class=MyCluster,
            cluster_kwargs={
                "processes": False,
                "n_workers": 0
            },
            adapt_kwargs=adapt_kwargs,
        )

        assert executor.adapt_kwargs == adapt_kwargs

        with executor.start():
            res = executor.wait(executor.submit(lambda x: x + 1, 1))
            assert res == 2

        assert called_with == adapt_kwargs
Ejemplo n.º 3
0
 def test_init_kwargs_are_passed_to_init(self, monkeypatch):
     client = MagicMock()
     monkeypatch.setattr(prefect.engine.executors.dask, "Client", client)
     executor = DaskExecutor(test_kwarg="test_value")
     with executor.start():
         pass
     assert client.called
     assert client.call_args[-1]["test_kwarg"] == "test_value"
Ejemplo n.º 4
0
 def test_task_names_are_passed_to_submit(self, monkeypatch):
     client = MagicMock()
     monkeypatch.setattr(distributed, "Client", client)
     executor = DaskExecutor()
     with executor.start():
         with prefect.context(task_full_name="FISH!"):
             executor.submit(lambda: None)
     kwargs = client.return_value.__enter__.return_value.submit.call_args[1]
     assert kwargs["key"].startswith("FISH!")
Ejemplo n.º 5
0
 def test_task_names_are_passed_to_map(self, monkeypatch):
     client = MagicMock()
     monkeypatch.setattr(prefect.engine.executors.dask, "Client", client)
     executor = DaskExecutor()
     with executor.start():
         with prefect.context(task_full_name="FISH![0]"):
             executor.map(lambda: None, [1, 2])
     kwargs = client.return_value.__enter__.return_value.map.call_args[1]
     assert kwargs["key"].startswith("FISH![0]")
Ejemplo n.º 6
0
 def test_context_tags_are_passed_to_submit(self, monkeypatch):
     client = MagicMock()
     monkeypatch.setattr(distributed, "Client", client)
     executor = DaskExecutor()
     with executor.start():
         with prefect.context(task_tags=["dask-resource:GPU=1"]):
             executor.submit(lambda: None)
     kwargs = client.return_value.__enter__.return_value.submit.call_args[1]
     assert kwargs["resources"] == {"GPU": 1.0}
Ejemplo n.º 7
0
 def test_context_tags_are_passed_to_map(self, monkeypatch):
     client = MagicMock()
     monkeypatch.setattr(prefect.engine.executors.dask, "Client", client)
     executor = DaskExecutor()
     with executor.start():
         with prefect.context(task_tags=["dask-resource:GPU=1"]):
             executor.map(lambda: None, [1, 2])
     kwargs = client.return_value.__enter__.return_value.map.call_args[1]
     assert kwargs["resources"] == {"GPU": 1.0}
Ejemplo n.º 8
0
    def test_connect_to_running_cluster(self):
        with distributed.Client(processes=False, set_as_default=False) as client:
            executor = DaskExecutor(address=client.scheduler.address)
            assert executor.address == client.scheduler.address
            assert executor.cluster_class is None
            assert executor.cluster_kwargs is None
            assert executor.client_kwargs == {"set_as_default": False}

            with executor.start():
                res = executor.wait(executor.submit(lambda x: x + 1, 1))
                assert res == 2
Ejemplo n.º 9
0
    def test_start_local_cluster(self):
        executor = DaskExecutor(cluster_kwargs={"processes": False})
        assert executor.cluster_class == distributed.LocalCluster
        assert executor.cluster_kwargs == {
            "processes": False,
            "silence_logs": logging.CRITICAL,
        }

        with executor.start():
            res = executor.wait(executor.submit(lambda x: x + 1, 1))
            assert res == 2
Ejemplo n.º 10
0
    def test_prep_dask_kwargs(self):
        executor = DaskExecutor()
        kwargs = executor._prep_dask_kwargs(
            dict(task_name="FISH!", task_tags=["dask-resource:GPU=1"])
        )
        assert kwargs["key"].startswith("FISH!-")
        assert kwargs["resources"] == {"GPU": 1.0}

        kwargs = executor._prep_dask_kwargs(
            dict(task_name="FISH!", task_tags=["dask-resource:GPU=1"], task_index=1)
        )
        assert kwargs["key"].startswith("FISH!-1-")
Ejemplo n.º 11
0
def mthread():
    "Multi-threaded executor"
    with Client(processes=False,
                scheduler_port=0,
                dashboard_address=":0",
                n_workers=2) as client:
        yield DaskExecutor(client.scheduler.address)
Ejemplo n.º 12
0
def run(src_dir, dst_dir, debug=False):
    src_dir = Parameter("src_dir", src_dir)

    # create destination
    create_dir(dst_dir)
    dst_dir = Parameter("dst_dir", dst_dir)

    with Flow("classify_pipeline") as flow:
        # load data
        h5_paths = find_src_files(src_dir, "h5")
        info = preload_array_info(h5_paths)
        prob_map = read_prob_map.map(h5_paths, unmapped(info))

        # classify
        label = classify.map(prob_map)

        # save
        tiff_paths = build_path.map(unmapped(dst_dir), h5_paths,
                                    unmapped("tif"))
        write_tiff.map(tiff_paths, label)

    if debug:
        flow.visualize()
    else:
        client = get_client()
        executor = DaskExecutor(address=client.scheduler.address)

        flow.run(executor=executor)
Ejemplo n.º 13
0
def main():
    with Flow("Check listings",
              environment=LocalEnvironment(executor=DaskExecutor())) as flow:
        city = Parameter("city")

        ## Extract
        # get the current listings
        listings = get_current_listings(city)
        # fetch the pages
        pages = fetch_pages(listings, city)

        ## Transform
        # parse the listings
        data = parse_listings(pages)

        # Load
        save_listings(data)

    # flow.storage = Docker(registry_url="bramevert/craig")

    # flow.run_config = DockerRun(
    #     env={"GOOGLE_APPLICATION_CREDENTIALS": "/home/app/craiglist-crawler-a7aff758fc9d.json"},
    #     image="craig:latest",
    #     labels=["bram-desktop"],
    # )
    # flow.register(project_name="Craiglist Crawler")

    # flow.run_agent()
    flow.run(city="vancouver")
Ejemplo n.º 14
0
    def run_flow(self) -> None:
        """
        Run the flow from specified flow_file_path location using a Dask executor
        """
        try:
            from prefect.engine import get_default_flow_runner_class
            from prefect.engine.executors import DaskExecutor
            from dask_kubernetes import KubeCluster

            with open(path.join(path.dirname(__file__),
                                "worker_pod.yaml")) as pod_file:
                worker_pod = yaml.safe_load(pod_file)
                worker_pod = self._populate_worker_pod_yaml(
                    yaml_obj=worker_pod)

                cluster = KubeCluster.from_dict(
                    worker_pod, namespace=prefect.context.get("namespace"))
                cluster.adapt(minimum=1, maximum=1)

                # Load serialized flow from file and run it with a DaskExecutor
                with open(
                        prefect.context.get("flow_file_path",
                                            "/root/.prefect/flow_env.prefect"),
                        "rb",
                ) as f:
                    flow = cloudpickle.load(f)

                    executor = DaskExecutor(address=cluster.scheduler_address)
                    runner_cls = get_default_flow_runner_class()
                    runner_cls(flow=flow).run(executor=executor)
                    sys.exit(0)  # attempt to force resource cleanup
        except Exception as exc:
            self.logger.error(
                "Unexpected error raised during flow run: {}".format(exc))
            raise exc
Ejemplo n.º 15
0
    def test_executor_logs_worker_events(self, caplog):
        caplog.set_level(logging.DEBUG, logger="prefect")
        with distributed.Client(
            n_workers=1, processes=False, set_as_default=False
        ) as client:
            executor = DaskExecutor(address=client.scheduler.address)
            with executor.start():
                client.cluster.scale(4)
                while len(client.scheduler_info()["workers"]) < 4:
                    time.sleep(0.1)
                client.cluster.scale(1)
                while len(client.scheduler_info()["workers"]) > 1:
                    time.sleep(0.1)

        assert any("Worker %s added" == rec.msg for rec in caplog.records)
        assert any("Worker %s removed" == rec.msg for rec in caplog.records)
Ejemplo n.º 16
0
    def test_cluster_class_and_kwargs(self):
        pytest.importorskip("distributed.deploy.spec")
        executor = DaskExecutor(
            cluster_class="distributed.deploy.spec.SpecCluster",
            cluster_kwargs={"some_kwarg": "some_val"},
            client_kwargs={"set_as_default": True},
        )
        assert executor.cluster_class == distributed.deploy.spec.SpecCluster
        assert executor.cluster_kwargs == {"some_kwarg": "some_val"}
        assert executor.client_kwargs == {"set_as_default": True}

        class TestCluster(object):
            pass

        executor = DaskExecutor(cluster_class=TestCluster)
        assert executor.cluster_class == TestCluster
Ejemplo n.º 17
0
def run(src_dir, dst_dir, debug=False):
    src_dir = Parameter("src_dir", src_dir)

    # create destination
    create_dir(dst_dir)
    dst_dir = Parameter("dst_dir", dst_dir)

    with Flow("convert_pipeline") as flow:
        # load data
        tiff_paths = find_src_files(src_dir, "tif")
        info = preload_array_info(tiff_paths)
        raw_data = read_tiff.map(tiff_paths, unmapped(info))

        # save as zarr for faster access
        zarr_paths = build_path.map(unmapped(dst_dir), tiff_paths,
                                    unmapped("zarr"))
        zarr_paths = write_zarr.map(zarr_paths, raw_data, unmapped("raw"))

        # convert
        h5_paths = build_path.map(unmapped(dst_dir), zarr_paths,
                                  unmapped("h5"))
        zarr_to_h5.map(zarr_paths, h5_paths)

    if debug:
        flow.visualize()
    else:
        client = get_client()
        executor = DaskExecutor(address=client.scheduler.address)

        flow.run(executor=executor)
Ejemplo n.º 18
0
def run(src_dir, dst_dir, config_path: str, debug=False):
    src_dir = Parameter("src_dir", src_dir)

    # create destination
    create_dir(dst_dir)
    dst_dir = Parameter("dst_dir", dst_dir)

    # number of workers
    config_path = Parameter("config_path", config_path)

    with Flow("inference_pipeline") as flow:
        # list tiles
        tiff_paths = find_src_files(src_dir, "h5")
        parted_tiff_paths = partition_path_list(tiff_paths, 5)

        prob_paths = infer.map(parted_tiff_paths, unmapped(config_path),
                               unmapped(dst_dir))
        prob_paths = combine_path_list(prob_paths)

    if debug:
        flow.visualize(filename="flow_debug")
    else:
        client = get_client()
        executor = DaskExecutor(address=client.scheduler.address)

        flow.run(executor=executor)
Ejemplo n.º 19
0
 def test_deprecated_client_kwargs(self):
     with pytest.warns(UserWarning, match="client_kwargs"):
         executor = DaskExecutor(
             cluster_class="distributed.LocalCluster", set_as_default=True,
         )
     assert executor.cluster_kwargs == {"silence_logs": logging.CRITICAL}
     assert executor.client_kwargs == {"set_as_default": True}
Ejemplo n.º 20
0
    def run_flow(self) -> None:
        """
        Run the flow from specified flow_file_path location using a Dask executor
        """
        from prefect.engine import get_default_flow_runner_class
        from prefect.engine.executors import DaskExecutor
        from dask_kubernetes import KubeCluster

        with open(path.join(path.dirname(__file__),
                            "worker_pod.yaml")) as pod_file:
            worker_pod = yaml.safe_load(pod_file)
            worker_pod = self._populate_worker_pod_yaml(yaml_obj=worker_pod)

            cluster = KubeCluster.from_dict(worker_pod)
            cluster.adapt(minimum=1, maximum=1)

            # Load serialized flow from file and run it with a DaskExecutor
            with open(
                    prefect.context.get("flow_file_path",
                                        "/root/.prefect/flow_env.prefect"),
                    "rb",
            ) as f:
                flow = cloudpickle.load(f)

                executor = DaskExecutor(address=cluster.scheduler_address)
                runner_cls = get_default_flow_runner_class()
                runner_cls(flow=flow).run(executor=executor)
Ejemplo n.º 21
0
def mproc():
    "Multi-processing executor"
    with Client(processes=True,
                scheduler_port=0,
                dashboard_address=":0",
                n_workers=2) as client:
        yield DaskExecutor(client.scheduler.address)
Ejemplo n.º 22
0
def mproc():
    "Multi-processing executor"
    with Client(processes=True) as client:
        yield DaskExecutor(client.scheduler.address, local_processes=True)
        try:
            client.shutdown()
        except:
            pass
Ejemplo n.º 23
0
def prepare_executor(executor_type, executor_address=None):
    """Instantiate a prefect executor"""
    if executor_type == 'dask':
        if executor_address is not None:
            executor = DaskExecutor(executor_address)
        else:
            executor = DaskExecutor(local_processes=True)
    elif executor_type == "synchronous":
        executor = SynchronousExecutor()
    elif executor_type == 'local':
        executor = LocalExecutor()
    else:
        # Should not happen if click parameters are done correctly, but
        # kept for completeness
        raise ValueError(f'Unknown executor type "{executor_type}".')

    return executor
Ejemplo n.º 24
0
    def test_temporary_cluster_forcefully_cancels_pending_tasks(self, tmpdir):
        filname = tmpdir.join("signal")

        def slow():
            time.sleep(10)
            with open(filname, "w") as f:
                f.write("Got here")

        executor = DaskExecutor()
        with executor.start():
            start = time.time()
            fut = executor.submit(slow)  # noqa
            time.sleep(0.1)
        stop = time.time()
        # Cluster shutdown before task could complete
        assert stop - start < 5
        assert not os.path.exists(filname)
Ejemplo n.º 25
0
def mthread():
    "Multi-threaded executor"
    with Client(processes=False) as client:
        yield DaskExecutor(client.scheduler.address)
        try:
            client.shutdown()
        except:
            pass
Ejemplo n.º 26
0
def main(config=config):
    """Executes vektis.agb.flow in DaskExecutor.
    """
    executor = DaskExecutor(n_workers=8)
    flow.run(
        executor=executor,
        parameters={"gcp": config.gcp},
    )
    def run(
        self, clean: bool = False, debug: bool = False, **kwargs,
    ):
        """
        Run a flow with your steps.

        Parameters
        ----------
        clean: bool
            Should the local staging directory be cleaned prior to this run.
            Default: False (Do not clean)
        debug: bool
            A debug flag for the developer to use to manipulate how much data runs,
            how it is processed, etc.
            Default: False (Do not debug)

        Notes
        -----
        Documentation on prefect:
        https://docs.prefect.io/core/

        Basic prefect example:
        https://docs.prefect.io/core/
        """
        # Initalize steps
        raw = steps.Raw()

        # Choose executor
        if debug:
            exe = LocalExecutor()
        else:
            # Set up connection to computation cluster
            cluster = LocalCluster()

            # Inform of Dask UI
            log.info(f"Cluster dashboard available at: {cluster.dashboard_link}")

            # Create dask executor
            exe = DaskExecutor(cluster.scheduler_address)

        # Configure your flow
        with Flow("{{ cookiecutter.project_slug }}") as flow:
            # If you want to clean the local staging directories pass clean
            # If you want to utilize some debugging functionality pass debug
            # If you don't utilize any of these, just pass the parameters you need.
            raw(
                clean=clean,
                debug=debug,
                **kwargs,  # Allows us to pass `--n {some integer}` or other params
            )

        # Run flow and get ending state
        state = flow.run(executor=exe)

        # Get and display any outputs you want to see on your local terminal
        log.info(raw.get_result(state, flow))
Ejemplo n.º 28
0
def mthread():
    "Multi-threaded executor using dask distributed"
    with Client(
            processes=False,
            scheduler_port=0,
            dashboard_address=":0",
            n_workers=1,
            threads_per_worker=2,
    ) as client:
        yield DaskExecutor(client.scheduler.address)
Ejemplo n.º 29
0
def execute(flow: Flow) -> state:
    """
    Returns:
        state: (state) state of league flow
    """
    with raise_on_exception():
        executor = DaskExecutor(address=os.getenv("WORKER_ADDRESS"))
        state = flow.run()

        return state
Ejemplo n.º 30
0
def mproc():
    "Multi-processing executor using dask distributed"
    with Client(
            processes=True,
            scheduler_port=0,
            dashboard_address=":0",
            n_workers=2,
            threads_per_worker=1,
    ) as client:
        yield DaskExecutor(client.scheduler.address)