def run_flow(self) -> None: """ Run the flow from specified flow_file_path location using the default executor """ # Call on_start callback if specified if self.on_start: self.on_start() try: from prefect.engine import ( get_default_flow_runner_class, get_default_executor_class, ) # Load serialized flow from file and run it with the executor with open( prefect.context.get("flow_file_path", "/root/.prefect/flow_env.prefect"), "rb", ) as f: flow = cloudpickle.load(f) runner_cls = get_default_flow_runner_class() executor_cls = get_default_executor_class() runner_cls(flow=flow).run(executor=executor_cls) except Exception as exc: self.logger.exception( "Unexpected error raised during flow run: {}".format(exc)) raise exc finally: # Call on_exit callback if specified if self.on_exit: self.on_exit()
def execute(self, flow: "Flow", **kwargs: Any) -> None: # type: ignore """ Run the provided flow here using the specified executor and executor kwargs. Args: - flow (Flow): the Flow object - **kwargs (Any): additional keyword arguments to pass to the runner """ # Call on_start callback if specified if self.on_start: self.on_start() try: from prefect.engine import ( get_default_executor_class, get_default_flow_runner_class, ) # Run flow with default executor class with set_temporary_config( {"engine.executor.default_class": self.executor}): executor = get_default_executor_class() executor = executor(**self.executor_kwargs) runner_cls = get_default_flow_runner_class() runner_cls(flow=flow).run(executor=executor) except Exception as exc: self.logger.exception( "Unexpected error raised during flow run: {}".format(exc)) raise exc finally: # Call on_exit callback if specified if self.on_exit: self.on_exit()
def run_flow(self) -> None: """ Run the flow from specified flow_file_path location using a Dask executor """ try: from prefect.engine import get_default_flow_runner_class from prefect.engine.executors import DaskExecutor from dask_kubernetes import KubeCluster with open(path.join(path.dirname(__file__), "worker_pod.yaml")) as pod_file: worker_pod = yaml.safe_load(pod_file) worker_pod = self._populate_worker_pod_yaml( yaml_obj=worker_pod) cluster = KubeCluster.from_dict( worker_pod, namespace=prefect.context.get("namespace")) cluster.adapt(minimum=1, maximum=1) # Load serialized flow from file and run it with a DaskExecutor with open( prefect.context.get("flow_file_path", "/root/.prefect/flow_env.prefect"), "rb", ) as f: flow = cloudpickle.load(f) executor = DaskExecutor(address=cluster.scheduler_address) runner_cls = get_default_flow_runner_class() runner_cls(flow=flow).run(executor=executor) sys.exit(0) # attempt to force resource cleanup except Exception as exc: self.logger.error( "Unexpected error raised during flow run: {}".format(exc)) raise exc
def run_flow(self) -> None: """ Run the flow from specified flow_file_path location using a Dask executor """ from prefect.engine import get_default_flow_runner_class from prefect.engine.executors import DaskExecutor from dask_kubernetes import KubeCluster with open(path.join(path.dirname(__file__), "worker_pod.yaml")) as pod_file: worker_pod = yaml.safe_load(pod_file) worker_pod = self._populate_worker_pod_yaml(yaml_obj=worker_pod) cluster = KubeCluster.from_dict(worker_pod) cluster.adapt(minimum=1, maximum=1) # Load serialized flow from file and run it with a DaskExecutor with open( prefect.context.get("flow_file_path", "/root/.prefect/flow_env.prefect"), "rb", ) as f: flow = cloudpickle.load(f) executor = DaskExecutor(address=cluster.scheduler_address) runner_cls = get_default_flow_runner_class() runner_cls(flow=flow).run(executor=executor)
def test_default_flow_runner_responds_to_config(): with utilities.configuration.set_temporary_config({ "engine.flow_runner.default_class": "prefect.engine.cloud.CloudFlowRunner" }): assert engine.get_default_flow_runner_class( ) is engine.cloud.CloudFlowRunner
def execute( # type: ignore self, storage: "Storage", flow_location: str, **kwargs: Any) -> None: """ Run a flow from the `flow_location` here using the specified executor and executor kwargs. Args: - storage (Storage): the storage object that contains information relating to where and how the flow is stored - flow_location (str): the location of the Flow to execute - **kwargs (Any): additional keyword arguments to pass to the runner """ try: from prefect.engine import ( get_default_executor_class, get_default_flow_runner_class, ) # Load serialized flow from file and run it with a DaskExecutor with open(flow_location, "rb") as f: flow = cloudpickle.load(f) with set_temporary_config( {"engine.executor.default_class": self.executor}): executor = get_default_executor_class() executor = executor(**self.executor_kwargs) runner_cls = get_default_flow_runner_class() runner_cls(flow=flow).run(executor=executor) except Exception as exc: self.logger.exception( "Unexpected error raised during flow run: {}".format(exc)) raise exc
def execute(self, flow: "Flow", **kwargs: Any) -> None: """ Executes the flow provided to this environment by calling `get_flow` on the flow's storage; if that fails, `get_env_runner` will be used with the OS environment variables inherited from this process. Args: - flow (Flow): the Flow object - **kwargs (Any): additional keyword arguments to pass to the runner """ # Call on_start callback if specified if self.on_start: self.on_start() try: from prefect.engine import get_default_flow_runner_class runner_cls = get_default_flow_runner_class() runner_cls(flow=flow).run(**kwargs) except Exception as exc: self.logger.exception( "Unexpected error raised during flow run: {}".format(exc)) raise exc finally: # Call on_exit callback if specified if self.on_exit: self.on_exit()
def execute(self, storage: "Storage", flow_location: str, **kwargs: Any) -> None: """ Executes the flow for this environment from the storage parameter, by calling `get_flow` on the storage; if that fails, `get_env_runner` will be used with the OS environment variables inherited from this process. Args: - storage (Storage): the Storage object that contains the flow - flow_location (str): the location of the Flow to execute - **kwargs (Any): additional keyword arguments to pass to the runner """ # Call on_start callback if specified if self.on_start: self.on_start() env = kwargs.pop("env", dict()) try: from prefect.engine import get_default_flow_runner_class flow = storage.get_flow(flow_location) runner_cls = get_default_flow_runner_class() runner_cls(flow=flow).run(**kwargs) except NotImplementedError: env_runner = storage.get_env_runner(flow_location) current_env = os.environ.copy() current_env.update(env) env_runner(env=current_env) finally: # Call on_exit callback if specified if self.on_exit: self.on_exit()
def test_default_flow_runner_with_bad_config(): with utilities.configuration.set_temporary_config( {"engine.flow_runner.default_class": "prefect.engine. bad import path"} ): with pytest.warns(UserWarning): assert ( engine.get_default_flow_runner_class() is engine.flow_runner.FlowRunner )
def run_flow(self) -> None: """ Run the flow from specified flow_file_path location using a Dask executor """ # Call on_start callback if specified if self.on_start: self.on_start() try: from prefect.engine import get_default_flow_runner_class from prefect.engine.executors import DaskExecutor from dask_kubernetes import KubeCluster if self._worker_spec: worker_pod = self._worker_spec worker_pod = self._populate_worker_spec_yaml( yaml_obj=worker_pod) else: with open(path.join(path.dirname(__file__), "worker_pod.yaml")) as pod_file: worker_pod = yaml.safe_load(pod_file) worker_pod = self._populate_worker_pod_yaml( yaml_obj=worker_pod) cluster = KubeCluster.from_dict( worker_pod, namespace=prefect.context.get("namespace")) cluster.adapt(minimum=self.min_workers, maximum=self.max_workers) # Load serialized flow from file and run it with a DaskExecutor with open( prefect.context.get("flow_file_path", "/root/.prefect/flow_env.prefect"), "rb", ) as f: flow = cloudpickle.load(f) ## populate global secrets secrets = prefect.context.get("secrets", {}) for secret in flow.storage.secrets: secrets[secret] = prefect.tasks.secrets.PrefectSecret( name=secret).run() with prefect.context(secrets=secrets): executor = DaskExecutor(address=cluster.scheduler_address) runner_cls = get_default_flow_runner_class() runner_cls(flow=flow).run(executor=executor) except Exception as exc: self.logger.exception( "Unexpected error raised during flow run: {}".format(exc)) raise exc finally: # Call on_exit callback if specified if self.on_exit: self.on_exit()
def _execute_flow_run(): flow_run_id = prefect.context.get("flow_run_id") if not flow_run_id: click.echo("Not currently executing a flow within a Cloud context.") raise Exception("Not currently executing a flow within a Cloud context.") query = { "query": { with_args("flow_run", {"where": {"id": {"_eq": flow_run_id}}}): { "flow": {"name": True, "storage": True}, "version": True, } } } client = Client() result = client.graphql(query) flow_run = result.data.flow_run if not flow_run: click.echo("Flow run {} not found".format(flow_run_id)) raise ValueError("Flow run {} not found".format(flow_run_id)) try: flow_data = flow_run[0].flow storage_schema = prefect.serialization.storage.StorageSchema() storage = storage_schema.load(flow_data.storage) # populate global secrets secrets = prefect.context.get("secrets", {}) for secret in storage.secrets: secrets[secret] = PrefectSecret(name=secret).run() with prefect.context(secrets=secrets, loading_flow=True): flow = storage.get_flow(storage.flows[flow_data.name]) with prefect.context(secrets=secrets): if getattr(flow, "run_config", None) is not None: runner_cls = get_default_flow_runner_class() runner_cls(flow=flow).run() else: environment = flow.environment environment.setup(flow) environment.execute(flow) except Exception as exc: msg = "Failed to load and execute Flow's environment: {}".format(repr(exc)) state = prefect.engine.state.Failed(message=msg) client.set_flow_run_state(flow_run_id=flow_run_id, state=state) click.echo(str(exc)) raise exc
def run(self, flow: "Flow") -> None: """ Run the flow using a temporary dask-kubernetes cluster. Args: - flow (Flow): the flow to run. """ # Call on_start callback if specified if self.on_start: self.on_start() try: from prefect.engine import get_default_flow_runner_class from prefect.executors import DaskExecutor from dask_kubernetes import KubeCluster if self._worker_spec: worker_pod = self._worker_spec worker_pod = self._populate_worker_spec_yaml(yaml_obj=worker_pod) else: with open( path.join(path.dirname(__file__), "worker_pod.yaml") ) as pod_file: worker_pod = yaml.safe_load(pod_file) worker_pod = self._populate_worker_pod_yaml(yaml_obj=worker_pod) cluster = KubeCluster.from_dict( worker_pod, namespace=prefect.context.get("namespace") ) cluster.adapt(minimum=self.min_workers, maximum=self.max_workers) executor = DaskExecutor(address=cluster.scheduler_address) runner_cls = get_default_flow_runner_class() runner_cls(flow=flow).run(executor=executor) except Exception as exc: self.logger.exception( "Unexpected error raised during flow run: {}".format(exc) ) raise exc finally: # Call on_exit callback if specified if self.on_exit: self.on_exit()
def run_flow(self) -> None: """ Run the flow from specified flow_file_path location using the default executor """ # Call on_start callback if specified if self.on_start: self.on_start() try: from prefect.engine import ( get_default_flow_runner_class, get_default_executor_class, ) # Load serialized flow from file and run it with the executor with open( prefect.context.get("flow_file_path", "/root/.prefect/flow_env.prefect"), "rb", ) as f: flow = cloudpickle.load(f) ## populate global secrets secrets = prefect.context.get("secrets", {}) for secret in flow.storage.secrets: secrets[secret] = prefect.tasks.secrets.PrefectSecret( name=secret).run() with prefect.context(secrets=secrets): runner_cls = get_default_flow_runner_class() executor_cls = get_default_executor_class()( **self.executor_kwargs) runner_cls(flow=flow).run(executor=executor_cls) except Exception as exc: self.logger.exception( "Unexpected error raised during flow run: {}".format(exc)) raise exc finally: # Call on_exit callback if specified if self.on_exit: self.on_exit()
def execute(self, flow: "Flow", **kwargs: Any) -> None: """ Executes the flow in the local process. Args: - flow (Flow): the Flow object - **kwargs (Any): additional keyword arguments to pass to the runner """ if self.on_start: self.on_start() try: from prefect.engine import get_default_flow_runner_class runner_cls = get_default_flow_runner_class() runner_cls(flow=flow).run(executor=self.executor, **kwargs) except Exception as exc: self.logger.exception( "Unexpected error raised during flow run: {}".format(exc)) raise exc finally: if self.on_exit: self.on_exit()
def run(self, flow: "Flow") -> None: """ Run the flow using this environment. Args: - flow (Flow): the flow object """ assert isinstance(self, Environment) # mypy if self.on_start: self.on_start() try: from prefect.engine import get_default_flow_runner_class runner_cls = get_default_flow_runner_class() runner_cls(flow=flow).run(executor=self.executor) # type: ignore except Exception as exc: self.logger.exception( "Unexpected error raised during flow run: {}".format(exc)) raise exc finally: if self.on_exit: self.on_exit()
def execute( # type: ignore self, flow: "Flow", **kwargs: Any # type: ignore ) -> None: """ Execute a flow run on a dask-cloudprovider cluster. Args: - flow (Flow): the Flow object - **kwargs (Any): Unused """ flow_run_info = None flow_run_id = prefect.context.get("flow_run_id") if self._on_execute: # If an on_execute Callable has been provided, retrieve the flow run parameters # and then allow the Callable a chance to update _provider_kwargs. This allows # better sizing of the cluster resources based on parameters for this Flow run. try: client = Client() flow_run_info = client.get_flow_run_info(flow_run_id) parameters = flow_run_info.parameters or {} # type: ignore self._on_execute(parameters, self._provider_kwargs) except Exception as exc: self.logger.info( "Failed to retrieve flow run info with error: {}".format(repr(exc)) ) if "image" not in self._provider_kwargs or not self._provider_kwargs.get( "image" ): # If image is not specified, use the Flow's image so that dependencies are # identical on all containers: Flow runner, Dask scheduler, and Dask workers flow_id = prefect.context.get("flow_id") try: client = Client() if not flow_id: # We've observed cases where flow_id is None if not flow_run_info: flow_run_info = client.get_flow_run_info(flow_run_id) flow_id = flow_run_info.flow_id flow_info = client.graphql( """query { flow(where: {id: {_eq: "%s"}}) { storage } }""" % flow_id ) storage_info = flow_info["data"]["flow"][0]["storage"] image = "{}/{}:{}".format( storage_info["registry_url"], storage_info["image_name"], storage_info["image_tag"], ) self.logger.info( "Using Flow's Docker image for Dask scheduler & workers: {}".format( image ) ) self._provider_kwargs["image"] = image except Exception as exc: self.logger.info( "Failed to retrieve flow info with error: {}".format(repr(exc)) ) self._create_dask_cluster() self.logger.info( "Executing on dynamically created Dask Cluster with scheduler address: {}".format( self.executor_kwargs["address"] ) ) if self.on_start: self.on_start() try: from prefect.engine import get_default_flow_runner_class from prefect.executors import DaskExecutor runner_cls = get_default_flow_runner_class() runner_cls(flow=flow).run(executor=DaskExecutor(**self.executor_kwargs)) except Exception as exc: self.logger.exception( "Unexpected error raised during flow run: {}".format(exc) ) raise finally: if self.on_exit: self.on_exit()
def test_default_flow_runner(): assert engine.get_default_flow_runner_class() is engine.flow_runner.FlowRunner
def run_flow(self) -> None: """ Run the flow using the default executor Raises: - ValueError: if no `flow_run_id` is found in context """ # Call on_start callback if specified if self.on_start: self.on_start() try: from prefect.engine import ( get_default_flow_runner_class, get_default_executor_class, ) flow_run_id = prefect.context.get("flow_run_id") if not flow_run_id: raise ValueError("No flow run ID found in context.") query = { "query": { with_args("flow_run", { "where": { "id": { "_eq": flow_run_id } } }): { "flow": { "name": True, "storage": True, }, } } } client = Client() result = client.graphql(query) flow_run = result.data.flow_run[0] flow_data = flow_run.flow storage_schema = prefect.serialization.storage.StorageSchema() storage = storage_schema.load(flow_data.storage) ## populate global secrets secrets = prefect.context.get("secrets", {}) for secret in storage.secrets: secrets[secret] = prefect.tasks.secrets.PrefectSecret( name=secret).run() with prefect.context(secrets=secrets): flow = storage.get_flow(storage.flows[flow_data.name]) runner_cls = get_default_flow_runner_class() if hasattr(self, "executor_kwargs"): executor_cls = get_default_executor_class()( **self.executor_kwargs) # type: ignore else: executor_cls = get_default_executor_class() runner_cls(flow=flow).run(executor=executor_cls) except Exception as exc: self.logger.exception( "Unexpected error raised during flow run: {}".format(exc)) raise exc finally: # Call on_exit callback if specified if self.on_exit: self.on_exit()
def flow_run(): """ Execute a flow run in the context of a backend API. """ flow_run_id = prefect.context.get("flow_run_id") if not flow_run_id: click.echo("Not currently executing a flow within a Cloud context.") raise Exception( "Not currently executing a flow within a Cloud context.") query = { "query": { with_args("flow_run", {"where": { "id": { "_eq": flow_run_id } }}): { "flow": { "name": True, "storage": True, "run_config": True }, "version": True, } } } client = Client() result = client.graphql(query) flow_run = result.data.flow_run if not flow_run: click.echo("Flow run {} not found".format(flow_run_id)) raise ValueError("Flow run {} not found".format(flow_run_id)) # Set the `running_with_backend` context variable to enable logging with prefect.context(running_with_backend=True): try: flow_data = flow_run[0].flow storage_schema = prefect.serialization.storage.StorageSchema() storage = storage_schema.load(flow_data.storage) # populate global secrets secrets = prefect.context.get("secrets", {}) for secret in storage.secrets: secrets[secret] = PrefectSecret(name=secret).run() with prefect.context(secrets=secrets, loading_flow=True): flow = storage.get_flow(flow_data.name) with prefect.context(secrets=secrets): if flow_data.run_config is not None: runner_cls = get_default_flow_runner_class() runner_cls(flow=flow).run() else: environment = flow.environment environment.setup(flow) environment.execute(flow) except Exception as exc: msg = "Failed to load and execute Flow's environment: {}".format( repr(exc)) state = prefect.engine.state.Failed(message=msg) client.set_flow_run_state(flow_run_id=flow_run_id, state=state) client.write_run_logs( dict( flow_run_id=flow_run_id, # type: ignore name="execute flow-run", message=msg, level="ERROR", )) click.echo(str(exc)) raise exc
worker_pod = self._worker_spec worker_pod = self._populate_worker_spec_yaml(yaml_obj=worker_pod) else: with open( path.join(path.dirname(__file__), "worker_pod.yaml") ) as pod_file: worker_pod = yaml.safe_load(pod_file) worker_pod = self._populate_worker_pod_yaml(yaml_obj=worker_pod) cluster = KubeCluster.from_dict( worker_pod, namespace=prefect.context.get("namespace") ) cluster.adapt(minimum=self.min_workers, maximum=self.max_workers) executor = DaskExecutor(address=cluster.scheduler_address) runner_cls = get_default_flow_runner_class() runner_cls(flow=flow).run(executor=executor) except Exception as exc: self.logger.exception( "Unexpected error raised during flow run: {}".format(exc) ) raise exc finally: # Call on_exit callback if specified if self.on_exit: self.on_exit() def _extra_loggers(self) -> str: """ Set dask-kubernetes related loggers for debugging and providing more visibility into the workings of the Dask cluster. These loggers are useful
def run_flow(self) -> None: """ Run the flow using a Dask executor """ # Call on_start callback if specified if self.on_start: self.on_start() try: from prefect.engine import get_default_flow_runner_class from prefect.engine.executors import DaskExecutor from dask_kubernetes import KubeCluster if self._worker_spec: worker_pod = self._worker_spec worker_pod = self._populate_worker_spec_yaml( yaml_obj=worker_pod) else: with open(path.join(path.dirname(__file__), "worker_pod.yaml")) as pod_file: worker_pod = yaml.safe_load(pod_file) worker_pod = self._populate_worker_pod_yaml( yaml_obj=worker_pod) cluster = KubeCluster.from_dict( worker_pod, namespace=prefect.context.get("namespace")) cluster.adapt(minimum=self.min_workers, maximum=self.max_workers) flow_run_id = prefect.context.get("flow_run_id") if not flow_run_id: raise ValueError("No flow run ID found in context.") query = { "query": { with_args("flow_run", { "where": { "id": { "_eq": flow_run_id } } }): { "flow": { "name": True, "storage": True, }, } } } client = Client() result = client.graphql(query) flow_run = result.data.flow_run[0] flow_data = flow_run.flow storage_schema = prefect.serialization.storage.StorageSchema() storage = storage_schema.load(flow_data.storage) ## populate global secrets secrets = prefect.context.get("secrets", {}) for secret in storage.secrets: secrets[secret] = prefect.tasks.secrets.PrefectSecret( name=secret).run() with prefect.context(secrets=secrets): flow = storage.get_flow(storage.flows[flow_data.name]) executor = DaskExecutor(address=cluster.scheduler_address) runner_cls = get_default_flow_runner_class() runner_cls(flow=flow).run(executor=executor) except Exception as exc: self.logger.exception( "Unexpected error raised during flow run: {}".format(exc)) raise exc finally: # Call on_exit callback if specified if self.on_exit: self.on_exit()