def execute( # type: ignore self, storage: "Storage", flow_location: str, **kwargs: Any # type: ignore ) -> None: flow_run_info = None flow_run_id = prefect.context.get("flow_run_id") if self._on_execute: # If an on_execute Callable has been provided, retrieve the flow run parameters # and then allow the Callable a chance to update _provider_kwargs. This allows # better sizing of the cluster resources based on parameters for this Flow run. try: client = Client() flow_run_info = client.get_flow_run_info(flow_run_id) parameters = flow_run_info.parameters or {} # type: ignore self._on_execute(parameters, self._provider_kwargs) except Exception as exc: self.logger.info( "Failed to retrieve flow run info with error: {}".format( repr(exc))) if "image" not in self._provider_kwargs or not self._provider_kwargs.get( "image"): # If image is not specified, use the Flow's image so that dependencies are # identical on all containers: Flow runner, Dask scheduler, and Dask workers flow_id = prefect.context.get("flow_id") try: client = Client() if not flow_id: # We've observed cases where flow_id is None if not flow_run_info: flow_run_info = client.get_flow_run_info(flow_run_id) flow_id = flow_run_info.flow_id flow_info = client.graphql("""query { flow(where: {id: {_eq: "%s"}}) { storage } }""" % flow_id) storage_info = flow_info["data"]["flow"][0]["storage"] image = "{}/{}:{}".format( storage_info["registry_url"], storage_info["image_name"], storage_info["image_tag"], ) self.logger.info( "Using Flow's Docker image for Dask scheduler & workers: {}" .format(image)) self._provider_kwargs["image"] = image except Exception as exc: self.logger.info( "Failed to retrieve flow info with error: {}".format( repr(exc))) self._create_dask_cluster() self.logger.info( "Executing on dynamically created Dask Cluster with scheduler address: {}" .format(self.executor_kwargs["address"])) super().execute(storage, flow_location, **kwargs)
def run( self, flow_name: str = None, project_name: str = None, parameters: dict = None, run_config: RunConfig = None, new_flow_context: dict = None, run_name: str = None, idempotency_key: str = None, scheduled_start_time: datetime.datetime = None, ) -> str: """ Run method for the task; responsible for scheduling the specified flow run. Args: - flow_name (str, optional): the name of the flow to schedule; if not provided, this method will use the flow name provided at initialization - project_name (str, optional): the Cloud project in which the flow is located; if not provided, this method will use the project provided at initialization. If running with Prefect Core's server as the backend, this should not be provided. - parameters (dict, optional): the parameters to pass to the flow run being scheduled; if not provided, this method will use the parameters provided at initialization - run_config (RunConfig, optional): a run-config to use for this flow run, overriding any existing flow settings. - new_flow_context (dict, optional): the optional run context for the new flow run - run_name (str, optional): name to be set for the flow run - idempotency_key (str, optional): a unique idempotency key for scheduling the flow run. Duplicate flow runs with the same idempotency key will only create a single flow run. This is useful for ensuring that only one run is created if this task is retried. If not provided, defaults to the active `task_run_id`. - scheduled_start_time (datetime, optional): the time to schedule the execution for; if not provided, defaults to now Returns: - str: the ID of the newly-scheduled flow run Raises: - ValueError: if flow was not provided, cannot be found, or if a project name was not provided while using Cloud as a backend Example: ```python from prefect.tasks.prefect.flow_run import StartFlowRun kickoff_task = StartFlowRun(project_name="Hello, World!", flow_name="My Cloud Flow") ``` """ # verify that flow and project names were passed where necessary if flow_name is None: raise ValueError("Must provide a flow name.") if project_name is None: raise ValueError("Must provide a project name.") where_clause = { "name": { "_eq": flow_name }, "archived": { "_eq": False }, "project": { "name": { "_eq": project_name } }, } # find the flow ID to schedule query = { "query": { with_args( "flow", { "where": where_clause, "order_by": { "version": EnumValue("desc") }, "limit": 1, }, ): {"id"} } } client = Client() flow = client.graphql(query).data.flow # verify that a flow has been returned if not flow: raise ValueError("Flow '{}' not found.".format(flow_name)) # grab the ID for the most recent version flow_id = flow[0].id if idempotency_key is None: idempotency_key = prefect.context.get("task_run_id", None) # providing an idempotency key ensures that retries for this task # will not create additional flow runs flow_run_id = client.create_flow_run( flow_id=flow_id, parameters=parameters, run_config=run_config, idempotency_key=idempotency_key, context=new_flow_context, run_name=run_name, scheduled_start_time=scheduled_start_time, ) self.logger.debug(f"Flow Run {flow_run_id} created.") self.logger.debug( f"Creating link artifact for Flow Run {flow_run_id}.") run_link = client.get_cloud_url("flow-run", flow_run_id, as_user=False) create_link(urlparse(run_link).path) self.logger.info(f"Flow Run: {run_link}") if not self.wait: return flow_run_id while True: time.sleep(self.poll_interval.total_seconds()) flow_run_state = client.get_flow_run_info(flow_run_id).state if flow_run_state.is_finished(): exc = signal_from_state(flow_run_state)( f"{flow_run_id} finished in state {flow_run_state}") raise exc
def execute( # type: ignore self, flow: "Flow", **kwargs: Any # type: ignore ) -> None: """ Execute a flow run on a dask-cloudprovider cluster. Args: - flow (Flow): the Flow object - **kwargs (Any): Unused """ flow_run_info = None flow_run_id = prefect.context.get("flow_run_id") if self._on_execute: # If an on_execute Callable has been provided, retrieve the flow run parameters # and then allow the Callable a chance to update _provider_kwargs. This allows # better sizing of the cluster resources based on parameters for this Flow run. try: client = Client() flow_run_info = client.get_flow_run_info(flow_run_id) parameters = flow_run_info.parameters or {} # type: ignore self._on_execute(parameters, self._provider_kwargs) except Exception as exc: self.logger.info( "Failed to retrieve flow run info with error: {}".format(repr(exc)) ) if "image" not in self._provider_kwargs or not self._provider_kwargs.get( "image" ): # If image is not specified, use the Flow's image so that dependencies are # identical on all containers: Flow runner, Dask scheduler, and Dask workers flow_id = prefect.context.get("flow_id") try: client = Client() if not flow_id: # We've observed cases where flow_id is None if not flow_run_info: flow_run_info = client.get_flow_run_info(flow_run_id) flow_id = flow_run_info.flow_id flow_info = client.graphql( """query { flow(where: {id: {_eq: "%s"}}) { storage } }""" % flow_id ) storage_info = flow_info["data"]["flow"][0]["storage"] image = "{}/{}:{}".format( storage_info["registry_url"], storage_info["image_name"], storage_info["image_tag"], ) self.logger.info( "Using Flow's Docker image for Dask scheduler & workers: {}".format( image ) ) self._provider_kwargs["image"] = image except Exception as exc: self.logger.info( "Failed to retrieve flow info with error: {}".format(repr(exc)) ) self._create_dask_cluster() self.logger.info( "Executing on dynamically created Dask Cluster with scheduler address: {}".format( self.executor_kwargs["address"] ) ) if self.on_start: self.on_start() try: from prefect.engine import get_default_flow_runner_class from prefect.executors import DaskExecutor runner_cls = get_default_flow_runner_class() runner_cls(flow=flow).run(executor=DaskExecutor(**self.executor_kwargs)) except Exception as exc: self.logger.exception( "Unexpected error raised during flow run: {}".format(exc) ) raise finally: if self.on_exit: self.on_exit()