from prefect import Flow, task import sys @task def exit_me(): sys.exit(1) f = Flow("high-resource-request", tasks=[exit_me]) from prefect.environments.storage import S3 from prefect.run_configs import KubernetesRun f.storage = S3(bucket="my-prefect-flows", secrets=["AWS_CREDENTIALS"],) f.run_config = KubernetesRun(cpu_request=40, memory_request="40Gi") f.register("Demo")
def register_flow_with_saturn( self, flow: Flow, dask_cluster_kwargs: Optional[Dict[str, Any]] = None, dask_adapt_kwargs: Optional[Dict[str, Any]] = None, instance_size: Optional[str] = None, ) -> Flow: """ Given a flow, set up all the details needed to run it on a Saturn Dask cluster. :param flow: A Prefect ``Flow`` object :param dask_cluster_kwargs: Dictionary of keyword arguments to the ``dask_saturn.SaturnCluster`` constructor. If ``None`` (the default), the cluster will be created with one worker (``{"n_workers": 1}``). :param dask_adapt_kwargs: Dictionary of keyword arguments to pass to ``dask_saturn.SaturnCluster.adapt()``. If ``None`` (the default), adaptive scaling will not be used. :param instance_size: Instance size for the flow run. Does not affect the size of dask workers. If ``None``, the smallest available size will be used. Prefect components ------------------ This method modifies the following components of ``Flow`` objects passed to it. * ``.storage``: a ``Webhook`` storage instance is added If using ``prefect<0.14.0`` * ``.environment``: a ``KubernetesJobEnvironment`` with a ``DaskExecutor`` is added. This environment will use the same image as the notebook from which this code is run. If using ``prefect>=0.14.0`` * ``run_config``: a ``KubernetesRun`` is added, which by default will use the same image, start script, and environment variables as the notebook from which this code is run. * ``executor``: a ``DaskExecutor``, which uses the same image as the notebook from which this code is run. Adaptive scaling is off by default -------------------------------------- Dasks's `adaptive scaling <https://docs.dask.org/en/latest/setup/adaptive.html>`_ can improve resource utilization by allowing Dask to spin things up and down based on your workload. This is off by default in the ``DaskExecutor`` created by ``prefect-saturn`` because in some cases, the interaction between Dask and Prefect can lead adaptive scaling to make choices that interfere with the way Prefect executes flows. Dask cluster is not closed at the end of each flow run ------------------------------------------------------ The first time a flow runs in Saturn, it will look for a specific Dask cluster. If that cluster isn't found, it will start one. By default, the Dask cluster will not be shut down when the flow is done running. All runs of one flow are executed on the same Saturn Dask cluster. Autoclosing is off by default to avoid the situation where you have two runs of the same flow happening at the same time, and one flow kills the Dask cluster the other flow is still running on. If you are not worried about concurrent flow runs and want to know that the Dask cluster will be shut down at the end of each flow run, you can override this default behavior with the parameter ``autoclose``. Setting this to ``True`` will tell Saturn to close down the Dask cluster at the end of a flow run. .. code-block:: python flow = integration.register_flow_with_saturn( flow=flow, dask_cluster_kwargs={ "n_workers": 4, "autoclose": True } ) Instance size ------------- Use ``prefect_saturn.describe_sizes()`` to get the available instance_size options. The returned dict maps instance_size to a short description of the resources available on that size (e.g. {"medium": "Medium - 2 cores - 4 GB RAM", ...}) """ default_cluster_kwargs = {"n_workers": 1, "autoclose": False} if dask_cluster_kwargs is None: dask_cluster_kwargs = default_cluster_kwargs elif dask_cluster_kwargs != {}: default_cluster_kwargs.update(dask_cluster_kwargs) dask_cluster_kwargs = default_cluster_kwargs if dask_adapt_kwargs is None: dask_adapt_kwargs = {} self._set_flow_metadata(flow, instance_size=instance_size) storage = self._get_storage() flow.storage = storage if RUN_CONFIG_AVAILABLE: flow.executor = DaskExecutor( cluster_class="dask_saturn.SaturnCluster", cluster_kwargs=dask_cluster_kwargs, adapt_kwargs=dask_adapt_kwargs, ) flow.run_config = KubernetesRun( job_template=self._flow_run_job_spec, labels=self._saturn_flow_labels, image=self._saturn_image, ) else: flow.environment = self._get_environment( cluster_kwargs=dask_cluster_kwargs, adapt_kwargs=dask_adapt_kwargs ) return flow