def test_load_and_run_flow(monkeypatch, tmpdir):
    myflow = Flow("test-flow")

    # This is gross. Since the flow is pickled/unpickled, there's no easy way
    # to access the same object to set a flag. Resort to setting an environment
    # variable as a global flag that won't get copied eagerly through
    # cloudpickle.
    monkeypatch.setenv("TEST_RUN_CALLED", "FALSE")

    class MyEnvironment(Environment):
        def run(self, flow):
            assert flow is myflow
            os.environ["TEST_RUN_CALLED"] = "TRUE"

    myflow.environment = MyEnvironment()

    storage = Local(str(tmpdir))
    myflow.storage = storage
    storage.add_flow(myflow)

    gql_return = MagicMock(
        return_value=MagicMock(
            data=MagicMock(
                flow_run=[
                    GraphQLResult(
                        {
                            "flow": GraphQLResult(
                                {"name": myflow.name, "storage": storage.serialize()}
                            )
                        }
                    )
                ],
            )
        )
    )
    client = MagicMock()
    client.return_value.graphql = gql_return
    monkeypatch.setattr("prefect.environments.execution.base.Client", client)

    with set_temporary_config({"cloud.auth_token": "test"}), prefect.context(
        {"flow_run_id": "id"}
    ):
        load_and_run_flow()
    assert os.environ["TEST_RUN_CALLED"] == "TRUE"
Example #2
0
    def register_flow_with_saturn(
        self,
        flow: Flow,
        dask_cluster_kwargs: Optional[Dict[str, Any]] = None,
        dask_adapt_kwargs: Optional[Dict[str, Any]] = None,
        instance_size: Optional[str] = None,
    ) -> Flow:
        """
        Given a flow, set up all the details needed to run it on
        a Saturn Dask cluster.

        :param flow: A Prefect ``Flow`` object
        :param dask_cluster_kwargs: Dictionary of keyword arguments
            to the ``dask_saturn.SaturnCluster`` constructor. If ``None``
            (the default), the cluster will be created with
            one worker (``{"n_workers": 1}``).
        :param dask_adapt_kwargs: Dictionary of keyword arguments
            to pass to ``dask_saturn.SaturnCluster.adapt()``. If
            ``None`` (the default), adaptive scaling will not be used.
        :param instance_size: Instance size for the flow run. Does not affect
            the size of dask workers. If ``None``, the smallest available size
            will be used.

        Prefect components
        ------------------

        This method modifies the following components of ``Flow`` objects
        passed to it.

        * ``.storage``: a ``Webhook`` storage instance is added

        If using ``prefect<0.14.0``

        * ``.environment``: a ``KubernetesJobEnvironment`` with a ``DaskExecutor``
            is added. This environment will use the same image as the notebook
            from which this code is run.

        If using ``prefect>=0.14.0``

        * ``run_config``: a ``KubernetesRun`` is added, which by default will use
            the same image, start script, and environment variables as the notebook
            from which this code is run.
        * ``executor``: a ``DaskExecutor``, which uses the same image as the notebook
            from which this code is run.

        Adaptive scaling is off by default
        --------------------------------------

        Dasks's `adaptive scaling <https://docs.dask.org/en/latest/setup/adaptive.html>`_
        can improve resource utilization by allowing Dask to spin things up
        and down based on your workload.

        This is off by default in the ``DaskExecutor`` created by ``prefect-saturn``
        because in some cases, the interaction between Dask and Prefect can lead
        adaptive scaling to make choices that interfere with the way Prefect executes
        flows.

        Dask cluster is not closed at the end of each flow run
        ------------------------------------------------------

        The first time a flow runs in Saturn, it will look for a specific Dask cluster. If
        that cluster isn't found, it will start one. By default, the Dask cluster will not
        be shut down when the flow is done running. All runs of one flow are executed on the
        same Saturn Dask cluster. Autoclosing is off by default to avoid the situation
        where you have two runs of the same flow happening at the same time, and one flow
        kills the Dask cluster the other flow is still running on.

        If you are not worried about concurrent flow runs and want to know that the Dask
        cluster will be shut down at the end of each flow run, you can override this default
        behavior with the parameter ``autoclose``. Setting this to ``True`` will tell Saturn
        to close down the Dask cluster at the end of a flow run.

        .. code-block:: python

            flow = integration.register_flow_with_saturn(
                flow=flow,
                dask_cluster_kwargs={
                    "n_workers": 4,
                    "autoclose": True
                }
            )

        Instance size
        -------------

        Use ``prefect_saturn.describe_sizes()`` to get the available instance_size options.
        The returned dict maps instance_size to a short description of the resources available on
        that size (e.g. {"medium": "Medium - 2 cores - 4 GB RAM", ...})
        """
        default_cluster_kwargs = {"n_workers": 1, "autoclose": False}

        if dask_cluster_kwargs is None:
            dask_cluster_kwargs = default_cluster_kwargs
        elif dask_cluster_kwargs != {}:
            default_cluster_kwargs.update(dask_cluster_kwargs)
            dask_cluster_kwargs = default_cluster_kwargs

        if dask_adapt_kwargs is None:
            dask_adapt_kwargs = {}

        self._set_flow_metadata(flow, instance_size=instance_size)

        storage = self._get_storage()
        flow.storage = storage

        if RUN_CONFIG_AVAILABLE:
            flow.executor = DaskExecutor(
                cluster_class="dask_saturn.SaturnCluster",
                cluster_kwargs=dask_cluster_kwargs,
                adapt_kwargs=dask_adapt_kwargs,
            )
            flow.run_config = KubernetesRun(
                job_template=self._flow_run_job_spec,
                labels=self._saturn_flow_labels,
                image=self._saturn_image,
            )
        else:
            flow.environment = self._get_environment(
                cluster_kwargs=dask_cluster_kwargs, adapt_kwargs=dask_adapt_kwargs
            )

        return flow
Example #3
0
from prefect import task, Flow
from prefect.environments import DaskKubernetesEnvironment
from prefect.environments.storage import S3


@task
def get_value():
    return "Example!"


@task
def output_value(value):
    print(value)


flow = Flow("dk8s-debug", )

# set task dependencies using imperative API
output_value.set_upstream(get_value, flow=flow)
output_value.bind(value=get_value, flow=flow)

flow.storage = S3(bucket="my-prefect-flows", secrets=["AWS_CREDENTIALS"])
flow.environment = DaskKubernetesEnvironment(
    metadata={"image": "joshmeek18/flows:all_extras9"})
flow.register(project_name="Demo")
Example #4
0
    def setup(self, worker=None):
        pass

    def teardown(self, worker=None):
        from prefect import Client

        msg = """
        Lost communication with Dask worker:
            {}
        """.format(worker)
        Client().write_run_logs([
            dict(
                flow_run_id=self.flow_run_id,
                name="DaskWorkerPlugin",
                message=msg,
                level="ERROR",
            )
        ])


@task
def sleep_me():
    time.sleep(60)


flow = Flow("plugin-test", tasks=[sleep_me])
flow.environment = LocalEnvironment(
    executor=DaskExecutor(address="localhost:8786", plugin=DaskReport))

flow.register(project_name="Demo")