async def insert( self, graphql_type: str, objects: List[dict], on_conflict: dict = None, alias: str = None, selection_set: GQLObjectTypes = "affected_rows", run_mutation: bool = True, ) -> Box: """ Runs an `insert` mutation against the provided Hasura type, evaluating the provided `selection_set` and returning the full result. The `selection_set` is inserted directly into the graphql query, and should not be surrounded by curly braces. Valid top-level keys are `affected_rows` and `returning`. """ if not isinstance(objects, (list, set, tuple)): raise TypeError( f"`objects` should be a collection; received {type(objects).__name__}" ) alias = alias or "insert" # ----------------------------------------------------------- # create variables arguments = {} variables = [] # --- variable: objects arguments["objects"] = Variable( name=f"{alias}_objects", type=f"[{graphql_type}_insert_input!]!", value=objects, ) variables.append(arguments["objects"]) # --- variable: on conflict if isinstance(on_conflict, str): arguments["on_conflict"] = EnumValue(on_conflict) elif on_conflict: arguments["on_conflict"] = Variable( name=f"{alias}_on_conflict", type=f"{graphql_type}_on_conflict", value=on_conflict, ) variables.append(arguments["on_conflict"]) # ------------------------------------------------------------- # build mutation mutation_name = f"{alias}: insert_{graphql_type}" selection_set = selection_set or "affected_rows" graphql = dict( query={with_args(mutation_name, arguments): selection_set}, variables=variables, ) if run_mutation: result = await self.execute_mutations_in_transaction( mutations=[graphql]) return result.data[alias] else: return graphql
def logs(name, info): """ Query logs for a flow run. \b Options: --name, -n TEXT A flow run name to query [required] --info, -i Retrieve detailed logging info """ log_query = { with_args("logs", { "order_by": { EnumValue("timestamp"): EnumValue("asc") } }): { "timestamp": True, "message": True, "level": True, }, "start_time": True, } if info: log_query = { with_args("logs", { "order_by": { EnumValue("timestamp"): EnumValue("asc") } }): { "timestamp": True, "info": True }, "start_time": True, } query = { "query": { with_args( "flow_run", { "where": { "name": { "_eq": name } }, "order_by": { EnumValue("start_time"): EnumValue("desc") }, }, ): log_query } } result = Client().graphql(query) flow_run = result.data.flow_run if not flow_run: click.secho("{} not found".format(name), fg="red") return run = flow_run[0] logs = run.logs output = [] if not info: for log in logs: output.append([log.timestamp, log.level, log.message]) click.echo( tabulate( output, headers=["TIMESTAMP", "LEVEL", "MESSAGE"], tablefmt="plain", numalign="left", stralign="left", )) return for log in logs: click.echo(log.info)
def register( self, flow: "Flow", project_name: str, build: bool = True, set_schedule_active: bool = True, version_group_id: str = None, compressed: bool = True, ) -> str: """ Push a new flow to Prefect Cloud Args: - flow (Flow): a flow to register - project_name (str): the project that should contain this flow. - build (bool, optional): if `True`, the flow's environment is built prior to serialization; defaults to `True` - set_schedule_active (bool, optional): if `False`, will set the schedule to inactive in the database to prevent auto-scheduling runs (if the Flow has a schedule). Defaults to `True`. This can be changed later. - version_group_id (str, optional): the UUID version group ID to use for versioning this Flow in Cloud; if not provided, the version group ID associated with this Flow's project and name will be used. - compressed (bool, optional): if `True`, the serialized flow will be; defaults to `True` compressed Returns: - str: the ID of the newly-registered flow Raises: - ClientError: if the register failed """ required_parameters = {p for p in flow.parameters() if p.required} if flow.schedule is not None and required_parameters: raise ClientError( "Flows with required parameters can not be scheduled automatically." ) if any(e.key for e in flow.edges) and flow.result_handler is None: raise ClientError( "Flows are required to have a result handler for storing inputs and outputs." ) if compressed: create_mutation = { "mutation($input: createFlowFromCompressedStringInput!)": { "createFlowFromCompressedString(input: $input)": {"id"} } } else: create_mutation = { "mutation($input: createFlowInput!)": { "createFlow(input: $input)": {"id"} } } query_project = { "query": { with_args("project", { "where": { "name": { "_eq": project_name } } }): { "id": True } } } project = self.graphql(query_project).data.project # type: ignore if not project: raise ValueError( 'Project {} not found. Run `client.create_project("{}")` to create it.' .format(project_name, project_name)) serialized_flow = flow.serialize(build=build) # type: Any # verify that the serialized flow can be deserialized try: prefect.serialization.flow.FlowSchema().load(serialized_flow) except Exception as exc: raise ValueError( "Flow could not be deserialized successfully. Error was: {}". format(repr(exc))) if compressed: serialized_flow = compress(serialized_flow) res = self.graphql( create_mutation, variables=dict(input=dict( projectId=project[0].id, serializedFlow=serialized_flow, setScheduleActive=set_schedule_active, versionGroupId=version_group_id, )), ) # type: Any flow_id = (res.data.createFlowFromCompressedString.id if compressed else res.data.createFlow.id) return flow_id
def _get_flow_run_metadata( self, flow_run_ids: Iterable[str], ) -> List["GraphQLResult"]: """ Get metadata about a collection of flow run ids that the agent is preparing to submit This function will filter the flow runs to a collection where: - The flow run is in a 'Scheduled' state. This prevents flow runs that have been submitted by another agent from being submitted again. - The flow run is in another state, but has task runs in a 'Running' state scheduled to start now. This is for retries in which the flow run is placed back into the ready queue but is not in a Scheduled state. Args: flow_run_ids: Flow run ids to query (order will not be respected) start_time: Only Returns: List: Metadata per flow run sorted by scheduled start time (ascending) """ if not flow_run_ids: return [] flow_run_ids = list(flow_run_ids) self.logger.debug( f"Retrieving metadata for {len(flow_run_ids)} flow run(s)...") # This buffer allows flow runs to retry immediately in their own deployment # without the agent creating a second deployment retry_start_time_buffer = pendulum.now("UTC").subtract( seconds=3).isoformat() where = { # Only get flow runs in the requested set "id": { "_in": flow_run_ids }, # and filter by the additional criteria... "_or": [ # This flow run has not been taken by another agent { "state": { "_eq": "Scheduled" } }, # Or, this flow run has been set to retry and has not been immediately # retried in its own process { "state": { "_eq": "Running" }, "task_runs": { "state_start_time": { "_lte": retry_start_time_buffer } }, }, ], } query = { "query": { with_args("flow_run", {"where": where}): { "id": True, "version": True, "state": True, "serialized_state": True, "parameters": True, "scheduled_start_time": True, "run_config": True, "name": True, "flow": { "id", "name", "environment", "storage", "version", "core_version", }, # Collect and return task run metadata as well so the state can be # updated in `_mark_flow_as_submitted` with_args( "task_runs", { "where": { "state_start_time": { "_lte": retry_start_time_buffer } } }, ): {"id", "version", "task_id", "serialized_state"}, } } } result = self.client.graphql(query) return sorted( result.data.flow_run, key=lambda flow_run: flow_run.serialized_state.get( "start_time", pendulum.now("utc").isoformat()), )
def flow_runs(limit, flow, project, started): """ Query information regarding Prefect flow runs. \b Options: --limit, l INTEGER A limit amount of flow runs to query, defaults to 10 --flow, -f TEXT Name of a flow to query for runs --project, -p TEXT Name of a project to query --started, -s Only retrieve started flow runs, default shows `Scheduled` runs """ if started: order = {"start_time": EnumValue("desc")} where = { "_and": { "flow": { "_and": { "name": { "_eq": flow }, } }, "start_time": { "_is_null": False }, } } if project: where["_and"]["flow"]["_and"]["project"] = { "name": { "_eq": project } } else: order = {"created": EnumValue("desc")} where = { "flow": { "_and": { "name": { "_eq": flow }, } } } if project: where["flow"]["_and"]["project"] = {"name": {"_eq": project}} query = { "query": { with_args("flow_run", { "where": where, "limit": limit, "order_by": order }): { "flow": { "name": True }, "created": True, "state": True, "name": True, "duration": True, "start_time": True, } } } result = Client().graphql(query) flow_run_data = result.data.flow_run output = [] for item in flow_run_data: start_time = (pendulum.parse(item.start_time).to_datetime_string() if item.start_time else None) output.append([ item.name, item.flow.name, item.state, pendulum.parse(item.created).diff_for_humans(), start_time, item.duration, ]) click.echo( tabulate( output, headers=[ "NAME", "FLOW NAME", "STATE", "AGE", "START TIME", "DURATION" ], tablefmt="plain", numalign="left", stralign="left", ))
async def schedule_flow_runs(flow_id: str, max_runs: int = 10) -> List[str]: """ Schedule the next `max_runs` runs for this flow. Runs will not be scheduled if they are earlier than latest currently-scheduled run that has auto_scheduled = True. Runs are created with an idempotency key to avoid rescheduling. Args: - flow_id (str): the flow ID - max_runs (int): the maximum number of runs to schedule (defaults to 10) Returns: - List[str]: the ids of the new runs """ if flow_id is None: raise ValueError("Invalid flow id.") run_ids = [] flow = await models.Flow.where({ # match the provided ID "id": { "_eq": flow_id }, # schedule is not none or flow group schedule is not none "_or": [ { "schedule": { "_is_null": False } }, { "flow_group": { "schedule": { "_is_null": False } } }, ], # schedule is active "is_schedule_active": { "_eq": True }, # flow is not archived "archived": { "_eq": False }, }).first( { "schedule": True, "flow_group": { "schedule": True }, with_args("flow_runs_aggregate", { "where": { "auto_scheduled": { "_eq": True } } }): { "aggregate": { "max": "scheduled_start_time" } }, }, apply_schema=False, ) if not flow: logger.debug(f"Flow {flow_id} can not be scheduled.") return run_ids else: # attempt to pull the schedule from the flow group if possible # if not possible, pull the schedule from the flow flow_schedule = flow.flow_group.schedule or flow.schedule try: flow_schedule = schedule_schema.load(flow_schedule) except Exception as exc: logger.error(exc, exc_info=True) logger.critical( f"Failed to deserialize schedule for flow {flow_id}: {flow_schedule}" ) return run_ids if flow.flow_runs_aggregate.aggregate.max.scheduled_start_time is not None: last_scheduled_run = pendulum.parse( flow.flow_runs_aggregate.aggregate.max.scheduled_start_time) else: last_scheduled_run = pendulum.now("UTC") schedule_coros = [] # schedule every event with an idempotent flow run for event in flow_schedule.next(n=max_runs, return_events=True): # if the event has parameter defaults or labels, we do allow for # same-time scheduling if event.parameter_defaults or event.labels is not None: md5 = hashlib.md5() param_string = str(sorted(json.dumps(event.parameter_defaults))) label_string = str(sorted(json.dumps(event.labels))) md5.update((param_string + label_string).encode("utf-8")) idempotency_key = ( f"auto-scheduled:{event.start_time.in_tz('UTC')}:{md5.hexdigest()}" ) # if this run was already scheduled, continue elif last_scheduled_run and event.start_time <= last_scheduled_run: continue else: idempotency_key = f"auto-scheduled:{event.start_time.in_tz('UTC')}" schedule_coros.append( api.runs.create_flow_run( flow_id=flow_id, scheduled_start_time=event.start_time, parameters=event.parameter_defaults, labels=event.labels, idempotency_key=idempotency_key, )) # schedule runs concurrently run_ids.extend(await asyncio.gather(*schedule_coros)) await models.FlowRun.where({ "id": { "_in": run_ids } }).update(set={"auto_scheduled": True}) return run_ids
async def run_scheduled(self, flow_id=None): """ Queries for any flow runs that are SCHEDULED, OR any flow runs that have SCHEDULED task runs [if the flow run itself is RUNNING]. Sets all Scheduled runs to Submitted and runs the flow. If a flow_id is provided, only flow runs of that flow are matched. """ now = pendulum.now() flow_runs = await models.FlowRun.where({ "_or": [ { "state_start_time": { "_lte": str(now) } }, { "state": { "_eq": "Running" }, "task_runs": { "state_start_time": { "_lte": str(now) } }, }, ], "flow_id": { "_eq": flow_id }, }).get( selection_set={ "id": True, "version": True, "tenant_id": True, "state": True, "serialized_state": True, "parameters": True, "flow": {"id", "environment", "name", "storage"}, with_args("task_runs", { "where": { "state_start_time": { "_lte": str(now) } } }): {"id", "version", "task_id", "serialized_state"}, }, limit=100, order_by={"state_start_time": EnumValue("asc")}, ) for fr in flow_runs: skip_counter = 0 fr_serialized_state = state_schema.load(fr.serialized_state) # set the flow run state to submitted, if it's scheduled if fr_serialized_state.is_scheduled(): try: await api.states.set_flow_run_state( flow_run_id=fr.id, state=Submitted( message="Submitted for execution", state=fr_serialized_state, ), ) except ValueError as exc: skip_counter += 1 if "Update failed" not in str(exc): raise # set each task run state to submitted, if it's scheduled for tr in fr.task_runs: tr_serialized_state = state_schema.load(tr.serialized_state) try: await api.states.set_task_run_state( task_run_id=tr.id, state=Submitted( message="Submitted for execution", state=tr_serialized_state, ), ) except ValueError as exc: skip_counter += 1 if "Update failed" not in str(exc): raise # none of the states were set, so we shouldn't bother running if skip_counter == 1 + len(fr.task_runs): continue self.logger.info(f'Submitting flow run "{fr.id}" for execution.') # run the flow self.run_flow( flow_name=fr.flow.name, storage=storage_schema.load(fr.flow.storage), environment=environment_schema.load(fr.flow.environment), config={ "cloud.api": f"http://localhost:4200", "cloud.graphql": "http://localhost:4200", "engine.flow_runner.default_class": "prefect.engine.cloud.CloudFlowRunner", "engine.task_runner.default_class": "prefect.engine.cloud.CloudTaskRunner", "engine.executor.default_class": "prefect.engine.executors.LocalExecutor", }, context={"flow_run_id": fr.id}, )
def register_serialized_flow( client: "prefect.Client", serialized_flow: dict, project_id: str, force: bool = False, ) -> Tuple[str, int, bool]: """Register a pre-serialized flow. Args: - client (prefect.Client): the prefect client - serialized_flow (dict): the serialized flow - project_id (str): the project id - force (bool, optional): If `False` (default), an idempotency key will be generated to avoid unnecessary re-registration. Set to `True` to force re-registration. Returns: - flow_id (str): the flow id - flow_version (int): the flow version - is_new (bool): True if this is a new flow version, false if re-registration was skipped. """ # Get most recent flow id for this flow. This can be removed once # the registration graphql routes return more information flow_name = serialized_flow["name"] resp = client.graphql( { "query": { with_args( "flow", { "where": { "_and": { "name": {"_eq": flow_name}, "project": {"id": {"_eq": project_id}}, } }, "order_by": {"version": EnumValue("desc")}, "limit": 1, }, ): {"id", "version"} } } ) if resp.data.flow: prev_id = resp.data.flow[0].id prev_version = resp.data.flow[0].version else: prev_id = None prev_version = 0 inputs = dict( project_id=project_id, serialized_flow=compress(serialized_flow), ) if not force: inputs["idempotency_key"] = hashlib.sha256( json.dumps(serialized_flow, sort_keys=True).encode() ).hexdigest() res = client.graphql( { "mutation($input: create_flow_from_compressed_string_input!)": { "create_flow_from_compressed_string(input: $input)": {"id"} } }, variables=dict(input=inputs), retry_on_api_error=False, ) new_id = res.data.create_flow_from_compressed_string.id if new_id == prev_id: return new_id, prev_version, False else: return new_id, prev_version + 1, True
self.heartbeat_cmd = [ sys.executable, "-m", "prefect", "heartbeat", "flow-run", "-i", flow_run_id, ] ======= self.heartbeat_cmd = ["prefect", "heartbeat", "flow-run", "-i", flow_run_id] >>>>>>> prefect clone query = { "query": { with_args("flow_run_by_pk", {"id": flow_run_id}): { "flow": {"settings": True}, } } } flow_run = self.client.graphql(query).data.flow_run_by_pk if not flow_run.flow.settings.get("heartbeat_enabled", True): return False return True except Exception: self.logger.exception( "Heartbeat failed for Flow '{}'".format(self.flow.name) ) return False def call_runner_target_handlers(self, old_state: State, new_state: State) -> State:
def run_flow(self) -> None: """ Run the flow using the default executor Raises: - ValueError: if no `flow_run_id` is found in context """ # Call on_start callback if specified if self.on_start: self.on_start() try: from prefect.engine import ( get_default_flow_runner_class, get_default_executor_class, ) flow_run_id = prefect.context.get("flow_run_id") if not flow_run_id: raise ValueError("No flow run ID found in context.") query = { "query": { with_args("flow_run", {"where": {"id": {"_eq": flow_run_id}}}): { "flow": {"name": True, "storage": True,}, } } } client = Client() result = client.graphql(query) flow_run = result.data.flow_run[0] flow_data = flow_run.flow storage_schema = prefect.serialization.storage.StorageSchema() storage = storage_schema.load(flow_data.storage) ## populate global secrets secrets = prefect.context.get("secrets", {}) for secret in storage.secrets: secrets[secret] = prefect.tasks.secrets.PrefectSecret(name=secret).run() with prefect.context(secrets=secrets): flow = storage.get_flow(storage.flows[flow_data.name]) runner_cls = get_default_flow_runner_class() if getattr(self, "executor", None) is not None: executor = self.executor # type: ignore else: executor_cls = get_default_executor_class() # Deprecated, to be removed if hasattr(self, "executor_kwargs"): executor = executor_cls(**self.executor_kwargs) # type: ignore else: executor = executor_cls runner_cls(flow=flow).run(executor=executor) except Exception as exc: self.logger.exception( "Unexpected error raised during flow run: {}".format(exc) ) raise exc finally: # Call on_exit callback if specified if self.on_exit: self.on_exit()
def logs(name, id, info): """ Query logs for a flow run. Note: at least one of `name` or `id` must be specified. If only `name` is set then the most recent flow run with that name will be queried. \b Options: --name, -n TEXT A flow run name to query --id TEXT A flow run ID to query --info, -i Retrieve detailed logging info """ if not name and not id: click.secho("Either --name or --id must be provided", fg="red") return log_query = { with_args("logs", {"order_by": {EnumValue("timestamp"): EnumValue("asc")}}): { "timestamp": True, "message": True, "level": True, }, "start_time": True, } if info: log_query = { with_args( "logs", {"order_by": {EnumValue("timestamp"): EnumValue("asc")}} ): {"timestamp": True, "info": True}, "start_time": True, } query = { "query": { with_args( "flow_run", { "where": {"name": {"_eq": name}, "id": {"_eq": id}}, "order_by": {EnumValue("start_time"): EnumValue("desc")}, }, ): log_query } } result = Client().graphql(query) flow_run = result.data.flow_run if not flow_run: click.secho("{} not found".format(name), fg="red") return run = flow_run[0] logs = run.logs output = [] if not info: for log in logs: output.append([log.timestamp, log.level, log.message]) click.echo( tabulate( output, headers=["TIMESTAMP", "LEVEL", "MESSAGE"], tablefmt="plain", numalign="left", stralign="left", ) ) return for log in logs: click.echo(log.info)
from prefect import Client from prefect.utilities.graphql import with_args c = Client() name = "my_flow" c.graphql({"query": {with_args("flow", {"where": {"name": {"_eq": name}}}): "id"}}) # c.graphql({"query": "'query' {'flow'('where': { 'name': { '_eq': 'ltest' } }) {'id'}}"})
def run( self, flow_name: str = None, project_name: str = None, parameters: dict = None, run_config: RunConfig = None, new_flow_context: dict = None, run_name: str = None, idempotency_key: str = None, scheduled_start_time: datetime.datetime = None, ) -> str: """ Run method for the task; responsible for scheduling the specified flow run. Args: - flow_name (str, optional): the name of the flow to schedule; if not provided, this method will use the flow name provided at initialization - project_name (str, optional): the Cloud project in which the flow is located; if not provided, this method will use the project provided at initialization. If running with Prefect Core's server as the backend, this should not be provided. - parameters (dict, optional): the parameters to pass to the flow run being scheduled; if not provided, this method will use the parameters provided at initialization - run_config (RunConfig, optional): a run-config to use for this flow run, overriding any existing flow settings. - new_flow_context (dict, optional): the optional run context for the new flow run - run_name (str, optional): name to be set for the flow run - idempotency_key (str, optional): a unique idempotency key for scheduling the flow run. Duplicate flow runs with the same idempotency key will only create a single flow run. This is useful for ensuring that only one run is created if this task is retried. If not provided, defaults to the active `task_run_id`. - scheduled_start_time (datetime, optional): the time to schedule the execution for; if not provided, defaults to now Returns: - str: the ID of the newly-scheduled flow run Raises: - ValueError: if flow was not provided, cannot be found, or if a project name was not provided while using Cloud as a backend Example: ```python from prefect.tasks.prefect.flow_run import StartFlowRun kickoff_task = StartFlowRun(project_name="Hello, World!", flow_name="My Cloud Flow") ``` """ # verify that flow and project names were passed where necessary if flow_name is None: raise ValueError("Must provide a flow name.") if project_name is None: raise ValueError("Must provide a project name.") where_clause = { "name": {"_eq": flow_name}, "archived": {"_eq": False}, "project": {"name": {"_eq": project_name}}, } # find the flow ID to schedule query = { "query": { with_args( "flow", { "where": where_clause, "order_by": {"version": EnumValue("desc")}, "limit": 1, }, ): {"id"} } } client = Client() flow = client.graphql(query).data.flow # verify that a flow has been returned if not flow: raise ValueError("Flow '{}' not found.".format(flow_name)) # grab the ID for the most recent version flow_id = flow[0].id if idempotency_key is None: idempotency_key = context.get("task_run_id", None) # providing an idempotency key ensures that retries for this task # will not create additional flow runs flow_run_id = client.create_flow_run( flow_id=flow_id, parameters=parameters, run_config=run_config, idempotency_key=idempotency_key, context=new_flow_context, run_name=run_name, scheduled_start_time=scheduled_start_time, ) self.logger.debug(f"Flow Run {flow_run_id} created.") self.logger.debug(f"Creating link artifact for Flow Run {flow_run_id}.") run_link = client.get_cloud_url("flow-run", flow_run_id, as_user=False) create_link(urlparse(run_link).path) if not self.wait: return flow_run_id while True: time.sleep(10) flow_run_state = client.get_flow_run_info(flow_run_id).state if flow_run_state.is_finished(): exc = signal_from_state(flow_run_state)( f"{flow_run_id} finished in state {flow_run_state}" ) raise exc
async def update( self, graphql_type: str, where: GQLObjectTypes = None, id: str = None, set: GQLObjectTypes = None, increment: GQLObjectTypes = None, alias: str = None, selection_set: GQLObjectTypes = "affected_rows", run_mutation: bool = True, ) -> Box: """ Runs an `update` mutation against the provided Hasura type and `where` clause, applying the operations (either `set` or `increment`) evaluating the provided `selection_set` and returning the full result. The `selection_set` is inserted directly into the graphql query, and should not be surrounded by curly braces. Valid top-level keys are `affected_rows` and `returning`. """ if id is None and not isinstance(where, dict): raise TypeError( "`where` must be provided as a dict if `id` is None; " f"received {type(where).__name__}") elif all(op is None for op in [set, increment]): raise ValueError("At least one update operation must be provided") where = where or {} if id is not None: where["id"] = {"_eq": id} alias = alias or "update" # ------------------------------------------------------------- # create variables arguments = {} variables = [] # --- variable: where arguments["where"] = Variable(name=f"{alias}_where", type=f"{graphql_type}_bool_exp!", value=where) variables.append(arguments["where"]) # --- variable: _set if set: arguments["_set"] = Variable(name=f"{alias}_set", type=f"{graphql_type}_set_input", value=set) variables.append(arguments["_set"]) # --- variable: _inc if increment: arguments["_inc"] = Variable(name=f"{alias}_inc", type=f"{graphql_type}_inc_input", value=increment) variables.append(arguments["_inc"]) # ------------------------------------------------------------- # build mutation mutation_name = f"{alias}: update_{graphql_type}" selection_set = selection_set or "affected_rows" graphql = dict( query={with_args(mutation_name, arguments): selection_set}, variables=variables, ) if run_mutation: result = await self.execute_mutations_in_transaction( mutations=[graphql]) return result.data[alias] else: return graphql
def run( self, flow_name: str = None, project_name: str = None, parameters: dict = None, idempotency_key: str = None, new_flow_context: dict = None, run_name: str = None, ) -> str: """ Run method for the task; responsible for scheduling the specified flow run. Args: - flow_name (str, optional): the name of the flow to schedule; if not provided, this method will use the flow name provided at initialization - project_name (str, optional): the Cloud project in which the flow is located; if not provided, this method will use the project provided at initialization. If running with Prefect Core's server as the backend, this should not be provided. - parameters (dict, optional): the parameters to pass to the flow run being scheduled; if not provided, this method will use the parameters provided at initialization - idempotency_key (str, optional): an optional idempotency key for scheduling the flow run; if provided, ensures that only one run is created if this task is retried or rerun with the same inputs. If not provided, the current flow run ID will be used. - new_flow_context (dict, optional): the optional run context for the new flow run - run_name (str, optional): name to be set for the flow run Returns: - str: the ID of the newly-scheduled flow run Raises: - ValueError: if flow was not provided, cannot be found, or if a project name was not provided while using Cloud as a backend Example: ```python from prefect.tasks.prefect.flow_run import FlowRunTask kickoff_task = FlowRunTask(project_name="Hello, World!", flow_name="My Cloud Flow") ``` """ # verify that flow and project names were passed where necessary if flow_name is None: raise ValueError("Must provide a flow name.") if project_name is None: raise ValueError("Must provide a project name.") where_clause = { "name": {"_eq": flow_name}, "archived": {"_eq": False}, "project": {"name": {"_eq": project_name}}, } # find the flow ID to schedule query = { "query": { with_args( "flow", { "where": where_clause, "order_by": {"version": EnumValue("desc")}, "limit": 1, }, ): {"id"} } } client = Client() flow = client.graphql(query).data.flow # verify that a flow has been returned if not flow: raise ValueError("Flow '{}' not found.".format(flow_name)) # grab the ID for the most recent version flow_id = flow[0].id idem_key = None if context.get("flow_run_id"): map_index = context.get("map_index") default = context.get("flow_run_id") + ( f"-{map_index}" if map_index else "" ) idem_key = idempotency_key or default # providing an idempotency key ensures that retries for this task # will not create additional flow runs flow_run_id = client.create_flow_run( flow_id=flow_id, parameters=parameters, idempotency_key=idem_key or idempotency_key, context=new_flow_context, run_name=run_name, ) self.logger.debug(f"Flow Run {flow_run_id} created.") if not self.wait: return flow_run_id while True: time.sleep(10) flow_run_state = client.get_flow_run_info(flow_run_id).state if flow_run_state.is_finished(): exc = signal_from_state(flow_run_state)( f"{flow_run_id} finished in state {flow_run_state}" ) raise exc
def get_logs( self, start_time: pendulum.DateTime = None, end_time: pendulum.DateTime = None, ) -> List["FlowRunLog"]: """ Get logs for this flow run from `start_time` to `end_time`. Args: - start_time (optional): A time to start the log query at, useful for limiting the scope. If not provided, all logs up to `updated_at` are retrieved. - end_time (optional): A time to end the log query at. By default, this is set to `self.updated_at` which is the last time that the flow run was updated in the backend before this object was created. Returns: A list of `FlowRunLog` objects sorted by timestamp """ client = prefect.Client() end_time = end_time or self.updated_at logs_query = { with_args( "logs", { "order_by": { EnumValue("timestamp"): EnumValue("asc") }, "where": { "_and": [ { "timestamp": { "_lte": end_time.isoformat() } }, ({ "timestamp": { "_gt": start_time.isoformat() } } if start_time else {}), ] }, }, ): { "timestamp": True, "message": True, "level": True } } result = client.graphql({ "query": { with_args( "flow_run", { "where": { "id": { "_eq": self.flow_run_id } }, }, ): logs_query } }) # Unpack the result logs = result.get("data", {}).get("flow_run", [{}])[0].get("logs", []) return [FlowRunLog.from_dict(log) for log in logs]
def deploy( self, flow: "Flow", project_name: str, build: bool = True, set_schedule_active: bool = True, compressed: bool = True, ) -> str: """ Push a new flow to Prefect Cloud Args: - flow (Flow): a flow to deploy - project_name (str): the project that should contain this flow. - build (bool, optional): if `True`, the flow's environment is built prior to serialization; defaults to `True` - set_schedule_active (bool, optional): if `False`, will set the schedule to inactive in the database to prevent auto-scheduling runs (if the Flow has a schedule). Defaults to `True`. This can be changed later. - compressed (bool, optional): if `True`, the serialized flow will be; defaults to `True` compressed Returns: - str: the ID of the newly-deployed flow Raises: - ClientError: if the deploy failed """ required_parameters = {p for p in flow.parameters() if p.required} if flow.schedule is not None and required_parameters: raise ClientError( "Flows with required parameters can not be scheduled automatically." ) if compressed: create_mutation = { "mutation($input: createFlowFromCompressedStringInput!)": { "createFlowFromCompressedString(input: $input)": {"id"} } } else: create_mutation = { "mutation($input: createFlowInput!)": { "createFlow(input: $input)": {"id"} } } query_project = { "query": { with_args("project", {"where": {"name": {"_eq": project_name}}}): { "id": True } } } project = self.graphql(query_project).data.project # type: ignore if not project: raise ValueError( "Project {} not found. Run `client.create_project({})` to create it.".format( project_name, project_name ) ) serialized_flow = flow.serialize(build=build) # type: Any if compressed: serialized_flow = compress(serialized_flow) res = self.graphql( create_mutation, input=dict( projectId=project[0].id, serializedFlow=serialized_flow, setScheduleActive=set_schedule_active, ), ) # type: Any flow_id = ( res.data.createFlowFromCompressedString.id if compressed else res.data.createFlow.id ) return flow_id
def flow_run(): """ Execute a flow run in the context of a backend API. """ flow_run_id = prefect.context.get("flow_run_id") if not flow_run_id: click.echo("Not currently executing a flow within a Cloud context.") raise Exception( "Not currently executing a flow within a Cloud context.") query = { "query": { with_args("flow_run", {"where": { "id": { "_eq": flow_run_id } }}): { "flow": { "name": True, "storage": True }, "version": True, } } } client = Client() result = client.graphql(query) flow_run = result.data.flow_run if not flow_run: click.echo("Flow run {} not found".format(flow_run_id)) raise ValueError("Flow run {} not found".format(flow_run_id)) try: flow_data = flow_run[0].flow storage_schema = prefect.serialization.storage.StorageSchema() storage = storage_schema.load(flow_data.storage) # populate global secrets secrets = prefect.context.get("secrets", {}) for secret in storage.secrets: secrets[secret] = PrefectSecret(name=secret).run() with prefect.context(secrets=secrets, loading_flow=True): flow = storage.get_flow(storage.flows[flow_data.name]) with prefect.context(secrets=secrets): if getattr(flow, "run_config", None) is not None: runner_cls = get_default_flow_runner_class() runner_cls(flow=flow).run() else: environment = flow.environment environment.setup(flow) environment.execute(flow) except Exception as exc: msg = "Failed to load and execute Flow's environment: {}".format( repr(exc)) state = prefect.engine.state.Failed(message=msg) client.set_flow_run_state(flow_run_id=flow_run_id, state=state) click.echo(str(exc)) raise exc
def run(self, flow_name: str = None, project_name: str = None, parameters: dict = None) -> str: """ Run method for the task; responsible for scheduling the specified flow run. Args: - flow_name (str, optional): the name of the flow to schedule; if not provided, this method will use the flow name provided at initialization - project_name (str, optional): the Cloud project in which the flow is located; if not provided, this method will use the project provided at initialization. If running with Prefect Core's server as the backend, this should not be provided. - parameters (dict, optional): the parameters to pass to the flow run being scheduled; if not provided, this method will use the parameters provided at initialization Returns: - str: the ID of the newly-scheduled flow run Raises: - ValueError: if flow was not provided, cannot be found, or if a project name was not provided while using Cloud as a backend Example: ```python from prefect.tasks.prefect.flow_run import FlowRunTask kickoff_task = FlowRunTask(project_name="Hello, World!", flow_name="My Cloud Flow") ``` """ # verify that flow and project names were passed where necessary if flow_name is None: raise ValueError("Must provide a flow name.") if project_name is None and config.backend == "cloud": raise ValueError("Must provide a project name.") where_clause = { "name": { "_eq": flow_name }, "archived": { "_eq": False }, } if project_name: where_clause["project"] = {"name": {"_eq": project_name}} # find the flow ID to schedule query = { "query": { with_args( "flow", { "where": where_clause, "order_by": { "version": EnumValue("desc") }, "limit": 1, }, ): {"id"} } } client = Client() flow = client.graphql(query).data.flow # verify that a flow has been returned if not flow: raise ValueError("Flow '{}' not found.".format(flow_name)) # grab the ID for the most recent version flow_id = flow[0].id return client.create_flow_run(flow_id=flow_id, parameters=parameters)
def _run_flow( name, version, parameters_file, parameters_string, run_name, watch, logs, no_url, project=None, ): if watch and logs: click.secho( "Streaming state and logs not currently supported together.", fg="red") return where_clause = { "_and": { "name": { "_eq": name }, "version": { "_eq": version }, "project": { "name": { "_eq": project } }, } } query = { "query": { with_args( "flow", { "where": where_clause, "order_by": { "name": EnumValue("asc"), "version": EnumValue("desc"), }, "distinct_on": EnumValue("name"), }, ): { "id": True } } } client = Client() result = client.graphql(query) flow_data = result.data.flow if flow_data: flow_id = flow_data[0].id else: click.secho("{} not found".format(name), fg="red") return # Load parameters from file if provided file_params = {} if parameters_file: with open(parameters_file) as params_file: file_params = json.load(params_file) # Load parameters from string if provided string_params = {} if parameters_string: string_params = json.loads(parameters_string) flow_run_id = client.create_flow_run(flow_id=flow_id, parameters={ **file_params, **string_params }, run_name=run_name) if no_url: click.echo("Flow Run ID: {}".format(flow_run_id)) else: flow_run_url = client.get_cloud_url("flow-run", flow_run_id) click.echo("Flow Run: {}".format(flow_run_url)) if watch: current_states = [] while True: query = { "query": { with_args("flow_run_by_pk", {"id": flow_run_id}): { with_args( "states", { "order_by": { EnumValue("timestamp"): EnumValue("asc") } }, ): { "state": True, "timestamp": True } } } } result = client.graphql(query) # Filter through retrieved states and output in order for state_index in result.data.flow_run_by_pk.states: state = state_index.state if state not in current_states: if state != "Success" and state != "Failed": click.echo("{} -> ".format(state), nl=False) else: click.echo(state) return flow_run_id current_states.append(state) time.sleep(3) if logs: all_logs = [] log_query = { with_args("logs", { "order_by": { EnumValue("timestamp"): EnumValue("asc") } }): { "timestamp": True, "message": True, "level": True }, "start_time": True, "state": True, } query = { "query": { with_args( "flow_run", { "where": { "id": { "_eq": flow_run_id } }, "order_by": { EnumValue("start_time"): EnumValue("desc") }, }, ): log_query } } while True: result = client.graphql(query) flow_run = result.data.flow_run if not flow_run: click.secho("{} not found".format(flow_run_id), fg="red") return new_run = flow_run[0] logs = new_run.logs output = [] for i in logs: if [i.timestamp, i.level, i.message] not in all_logs: if not len(all_logs): click.echo( tabulate( [[i.timestamp, i.level, i.message]], headers=["TIMESTAMP", "LEVEL", "MESSAGE"], tablefmt="plain", numalign="left", stralign="left", )) all_logs.append([i.timestamp, i.level, i.message]) continue output.append([i.timestamp, i.level, i.message]) all_logs.append([i.timestamp, i.level, i.message]) if output: click.echo( tabulate(output, tablefmt="plain", numalign="left", stralign="left")) if new_run.state == "Success" or new_run.state == "Failed": return flow_run_id time.sleep(3) return flow_run_id
def cloud( name, project, version, parameters_file, parameters_string, run_name, watch, logs, no_url, ): """ Run a registered flow in Prefect Cloud. \b Options: --name, -n TEXT The name of a flow to run [required] --project, -p TEXT The name of a project that contains the flow [required] --version, -v INTEGER A flow version to run --parameters-file, -pf FILE PATH A filepath of a JSON file containing parameters --parameters-string, -ps TEXT A string of JSON parameters --run-name, -rn TEXT A name to assign for this run --watch, -w Watch current state of the flow run, stream output to stdout --logs, -l Get logs of the flow run, stream output to stdout --no-url Only output the flow run id instead of a link \b If both `--parameters-file` and `--parameters-string` are provided then the values passed in through the string will override the values provided from the file. \b e.g. File contains: {"a": 1, "b": 2} String: '{"a": 3}' Parameters passed to the flow run: {"a": 3, "b": 2} """ if watch and logs: click.secho( "Streaming state and logs not currently supported together.", fg="red") return query = { "query": { with_args( "flow", { "where": { "_and": { "name": { "_eq": name }, "version": { "_eq": version }, "project": { "name": { "_eq": project } }, } }, "order_by": { "name": EnumValue("asc"), "version": EnumValue("desc"), }, "distinct_on": EnumValue("name"), }, ): { "id": True } } } client = Client() result = client.graphql(query) flow_data = result.data.flow if flow_data: flow_id = flow_data[0].id else: click.secho("{} not found".format(name), fg="red") return # Load parameters from file if provided file_params = {} if parameters_file: with open(parameters_file) as params_file: file_params = json.load(params_file) # Load parameters from string if provided string_params = {} if parameters_string: string_params = json.loads(parameters_string) flow_run_id = client.create_flow_run(flow_id=flow_id, parameters={ **file_params, **string_params }, run_name=run_name) if no_url: click.echo("Flow Run ID: {}".format(flow_run_id)) else: # Generate direct link to Cloud run tenant_slug = client.get_default_tenant_slug() url = (re.sub("api-", "", config.cloud.api) if re.search("api-", config.cloud.api) else re.sub( "api", "cloud", config.cloud.api)) flow_run_url = "/".join( [url.rstrip("/"), tenant_slug, "flow-run", flow_run_id]) click.echo("Flow Run: {}".format(flow_run_url)) if watch: current_states = [] while True: query = { "query": { with_args("flow_run_by_pk", {"id": flow_run_id}): { with_args( "states", { "order_by": { EnumValue("timestamp"): EnumValue("asc") } }, ): { "state": True, "timestamp": True } } } } result = client.graphql(query) # Filter through retrieved states and output in order for state_index in result.data.flow_run_by_pk.states: state = state_index.state if state not in current_states: if state != "Success" and state != "Failed": click.echo("{} -> ".format(state), nl=False) else: click.echo(state) return current_states.append(state) time.sleep(3) if logs: all_logs = [] log_query = { with_args("logs", { "order_by": { EnumValue("timestamp"): EnumValue("asc") } }): { "timestamp": True, "message": True, "level": True }, "start_time": True, } query = { "query": { with_args( "flow_run", { "where": { "id": { "_eq": flow_run_id } }, "order_by": { EnumValue("start_time"): EnumValue("desc") }, }, ): log_query } } while True: result = client.graphql(query) flow_run = result.data.flow_run if not flow_run: click.secho("{} not found".format(flow_run_id), fg="red") return new_run = flow_run[0] logs = new_run.logs output = [] for i in logs: if [i.timestamp, i.level, i.message] not in all_logs: if not len(all_logs): click.echo( tabulate( [[i.timestamp, i.level, i.message]], headers=["TIMESTAMP", "LEVEL", "MESSAGE"], tablefmt="plain", numalign="left", stralign="left", )) all_logs.append([i.timestamp, i.level, i.message]) continue output.append([i.timestamp, i.level, i.message]) all_logs.append([i.timestamp, i.level, i.message]) if output: click.echo( tabulate(output, tablefmt="plain", numalign="left", stralign="left")) # Check if state is either Success or Failed, exit if it is pk_query = { "query": { with_args("flow_run_by_pk", {"id": flow_run_id}): { "state": True } } } result = client.graphql(pk_query) if (result.data.flow_run_by_pk.state == "Success" or result.data.flow_run_by_pk.state == "Failed"): return time.sleep(3)
def run_flow(self) -> None: """ Run the flow using a Dask executor """ # Call on_start callback if specified if self.on_start: self.on_start() try: from prefect.engine import get_default_flow_runner_class from prefect.engine.executors import DaskExecutor from dask_kubernetes import KubeCluster if self._worker_spec: worker_pod = self._worker_spec worker_pod = self._populate_worker_spec_yaml( yaml_obj=worker_pod) else: with open(path.join(path.dirname(__file__), "worker_pod.yaml")) as pod_file: worker_pod = yaml.safe_load(pod_file) worker_pod = self._populate_worker_pod_yaml( yaml_obj=worker_pod) cluster = KubeCluster.from_dict( worker_pod, namespace=prefect.context.get("namespace")) cluster.adapt(minimum=self.min_workers, maximum=self.max_workers) flow_run_id = prefect.context.get("flow_run_id") if not flow_run_id: raise ValueError("No flow run ID found in context.") query = { "query": { with_args("flow_run", { "where": { "id": { "_eq": flow_run_id } } }): { "flow": { "name": True, "storage": True, }, } } } client = Client() result = client.graphql(query) flow_run = result.data.flow_run[0] flow_data = flow_run.flow storage_schema = prefect.serialization.storage.StorageSchema() storage = storage_schema.load(flow_data.storage) ## populate global secrets secrets = prefect.context.get("secrets", {}) for secret in storage.secrets: secrets[secret] = prefect.tasks.secrets.PrefectSecret( name=secret).run() with prefect.context(secrets=secrets): flow = storage.get_flow(storage.flows[flow_data.name]) executor = DaskExecutor(address=cluster.scheduler_address) runner_cls = get_default_flow_runner_class() runner_cls(flow=flow).run(executor=executor) except Exception as exc: self.logger.exception( "Unexpected error raised during flow run: {}".format(exc)) raise exc finally: # Call on_exit callback if specified if self.on_exit: self.on_exit()
def query_flow_runs(self) -> list: """ Query Prefect Cloud for flow runs which need to be deployed and executed Returns: - list: A list of GraphQLResult flow run objects """ self.logger.debug("Querying for flow runs") # keep a copy of what was curringly running before the query (future callbacks may be # updating this set) currently_submitting_flow_runs = self.submitting_flow_runs.copy() # Get scheduled flow runs from queue mutation = { "mutation($input: get_runs_in_queue_input!)": { "get_runs_in_queue(input: $input)": {"flow_run_ids"} } } now = pendulum.now("UTC") result = self.client.graphql( mutation, variables={ "input": { "before": now.isoformat(), "labels": list(self.labels), "tenant_id": self.client._active_tenant_id, } }, ) # we queried all of the available flow runs, however, some may have already been pulled # by this agent and are in the process of being submitted in the background. We do not # want to act on these "duplicate" flow runs until we've been assured that the background # thread has attempted to submit the work (successful or otherwise). flow_run_ids = set( result.data.get_runs_in_queue.flow_run_ids) # type: ignore if flow_run_ids: msg = "Found flow runs {}".format( result.data.get_runs_in_queue.flow_run_ids) else: msg = "No flow runs found" already_submitting = flow_run_ids & currently_submitting_flow_runs target_flow_run_ids = flow_run_ids - already_submitting if already_submitting: msg += " ({} already submitting: {})".format( len(already_submitting), list(already_submitting)) self.logger.debug(msg) # Query metadata for flow runs found in queue query = { "query": { with_args( "flow_run", { # match flow runs in the flow_run_ids list "where": { "id": { "_in": list(target_flow_run_ids) }, "_or": [ # who are EITHER scheduled... { "state": { "_eq": "Scheduled" } }, # OR running with task runs scheduled to start more than 3 # seconds ago { "state": { "_eq": "Running" }, "task_runs": { "state_start_time": { "_lte": str( now.subtract(seconds=3)) # type: ignore } }, }, ], }, }, ): { "id": True, "version": True, "state": True, "serialized_state": True, "parameters": True, "scheduled_start_time": True, "flow": { "id", "name", "environment", "run_config", "storage", "version", "core_version", }, with_args( "task_runs", { "where": { "state_start_time": { "_lte": str(now.subtract(seconds=3)) # type: ignore } } }, ): {"id", "version", "task_id", "serialized_state"}, } } } if target_flow_run_ids: self.logger.debug("Querying flow run metadata") result = self.client.graphql(query) # Return flow runs sorted by scheduled start time return sorted(result.data.flow_run, key=lambda flow_run: flow_run.scheduled_start_time) else: return []
async def reap_zombie_task_runs(self, heartbeat_cutoff: datetime.datetime = None ) -> int: """ Zombie tasks are tasks that claim to be Running, but haven't updated their heartbeat. This method either retries them or marks them as failed. Returns: - int: the number of zombie task runs that were handled """ zombies = 0 heartbeat_cutoff = heartbeat_cutoff or pendulum.now("utc").subtract( minutes=10) where_clause = await self.get_task_runs_where_clause( heartbeat_cutoff=heartbeat_cutoff) task_runs = await models.TaskRun.where(where_clause).get( selection_set={ "id": True, "flow_run_id": True, "tenant_id": True, # Information about the current flow run state "flow_run": {"state"}, # get information about retries from task "task": {"max_retries", "retry_delay"}, # count the number of retrying states for this task run with_args( "retry_count: states_aggregate", {"where": { "state": { "_eq": "Retrying" } }}, ): { "aggregate": {"count"} }, }, limit=5000, order_by={"updated": EnumValue("desc")}, apply_schema=False, ) if task_runs: self.logger.info( f"Zombie killer found {len(task_runs)} task runs.") # Set task run states to failed for tr in task_runs: try: # if the flow run is running and retries are available, mark as retrying if (tr.flow_run.state == "Running" and tr.retry_count.aggregate.count < (tr.task.max_retries or 0)): message = ( "No heartbeat detected from the remote task; retrying the run." f"This will be retry {tr.retry_count.aggregate.count + 1} of {tr.task.max_retries}." ) retry_delay = orm._as_timedelta(tr.task.retry_delay or "0") await prefect.api.states.set_task_run_state( task_run_id=tr.id, state=Retrying( message=message, run_count=tr.retry_count.aggregate.count + 1, start_time=pendulum.now("UTC") + retry_delay, ), ) # mark failed else: message = "No heartbeat detected from the remote task; marking the run as failed." await prefect.api.states.set_task_run_state( task_run_id=tr.id, state=Failed(message=message), ) # log the state change to the task run await prefect.api.logs.create_logs( [ dict( tenant_id=tr.tenant_id, flow_run_id=tr.flow_run_id, task_run_id=tr.id, name=f"{self.logger.name}.TaskRun", message=message, level="ERROR", ) ], defer_db_write=False, ) zombies += 1 except ValueError as exc: self.logger.error(exc) if zombies: self.logger.info(f"Addressed {zombies} zombie task runs.") return zombies
def tasks(name, flow_name, flow_version, project, limit): """ Query information regarding your Prefect tasks. \b Options: --name, -n TEXT A task name to query --flow-name, -fn TEXT A flow name to query --flow-version, -fv INTEGER A flow version to query --project, -p TEXT The name of a project to query --limit, -l INTEGER A limit amount of tasks to query, defaults to 10 """ where_clause = { "_and": { "name": { "_eq": name }, "flow": { "name": { "_eq": flow_name }, "version": { "_eq": flow_version }, }, } } if project: where_clause["_and"]["flow"]["project"] = {"name": {"_eq": project}} query = { "query": { with_args( "task", { "where": where_clause, "limit": limit, "order_by": { "created": EnumValue("desc") }, }, ): { "name": True, "created": True, "flow": { "name": True, "version": True }, "mapped": True, "type": True, } } } result = Client().graphql(query) task_data = result.data.task output = [] for item in task_data: output.append([ item.name, item.flow.name, item.flow.version, pendulum.parse(item.created).diff_for_humans(), item.mapped, item.type, ]) click.echo( tabulate( output, headers=[ "NAME", "FLOW NAME", "FLOW VERSION", "AGE", "MAPPED", "TYPE" ], tablefmt="plain", numalign="left", stralign="left", ))
def cloud(name, project, version, watch): """ Run a deployed flow in Prefect Cloud. \b Options: --name, -n TEXT The name of a flow to run [required] --project, -p TEXT The name of a project that contains the flow [required] --version, -v INTEGER A flow version to run --watch, -w Watch current state of the flow run, stream output to stdout """ query = { "query": { with_args( "flow", { "where": { "_and": { "name": { "_eq": name }, "version": { "_eq": version }, "project": { "name": { "_eq": project } }, } }, "order_by": { "name": EnumValue("asc"), "version": EnumValue("desc"), }, "distinct_on": EnumValue("name"), }, ): { "id": True } } } client = Client() result = client.graphql(query) flow_data = result.data.flow if flow_data: flow_id = flow_data[0].id else: click.secho("{} not found".format(name), fg="red") return flow_run_id = client.create_flow_run(flow_id=flow_id) click.echo("Flow Run ID: {}".format(flow_run_id)) # TODO: Convert to using a subscription and make output prettier if watch: current_state = "" while True: query = { "query": { with_args("flow_run_by_pk", {"id": flow_run_id}): { "state": True } } } result = client.graphql(query) if result.data.flow_run_by_pk.state != current_state: current_state = result.data.flow_run_by_pk.state if current_state != "Success" and current_state != "Failed": click.echo("{} -> ".format(current_state), nl=False) else: click.echo(current_state) break time.sleep(3)
def flows(name, version, project, limit, all_versions): """ Query information regarding your Prefect flows. \b Options: --name, -n TEXT A flow name to query --version, -v TEXT A flow version to query --project, -p TEXT The name of a project to query --limit, -l INTEGER A limit amount of flows to query, defaults to 10 --all-versions Output all versions of a flow, default shows most recent """ distinct_on = EnumValue("name") if all_versions: distinct_on = None where_clause = { "_and": { "name": { "_eq": name }, "version": { "_eq": version }, } } query_results = { "name": True, "version": True, "created": True, } headers = ["NAME", "VERSION", "AGE"] if project: where_clause["_and"]["project"] = {"name": {"_eq": project}} query_results["project"] = {"name": True} headers.append("PROJECT NAME") query = { "query": { with_args( "flow", { "where": where_clause, "order_by": { "name": EnumValue("asc"), "version": EnumValue("desc"), }, "distinct_on": distinct_on, "limit": limit, }, ): query_results } } result = Client().graphql(query) flow_data = result.data.flow output = [] for item in flow_data: result_output = [ item.name, item.version, pendulum.parse(item.created).diff_for_humans(), ] if project: result_output.append(item.project.name, ) output.append(result_output) click.echo( tabulate( output, headers=headers, tablefmt="plain", numalign="left", stralign="left", ))
def query_flow_runs(self, tenant_id: str) -> list: """ Query Prefect Cloud for flow runs which need to be deployed and executed Args: - tenant_id (str): The tenant id to use in the query Returns: - list: A list of GraphQLResult flow run objects """ self.logger.debug("Querying for flow runs") # Get scheduled flow runs from queue mutation = { "mutation($input: getRunsInQueueInput!)": { "getRunsInQueue(input: $input)": {"flow_run_ids"} } } now = pendulum.now("UTC") result = self.client.graphql( mutation, variables={ "input": { "tenantId": tenant_id, "before": now.isoformat() } }, ) flow_run_ids = result.data.getRunsInQueue.flow_run_ids # type: ignore self.logger.debug("Found flow runs {}".format(flow_run_ids)) # Query metadata fow flow runs found in queue query = { "query": { with_args( "flow_run", { # match flow runs in the flow_run_ids list "where": { "id": { "_in": flow_run_ids }, "_or": [ # who are EITHER scheduled... { "state": { "_eq": "Scheduled" } }, # OR running with task runs scheduled to start more than 3 seconds ago { "state": { "_eq": "Running" }, "task_runs": { "state_start_time": { "_lte": str(now.subtract(seconds=3)) } }, }, ], } }, ): { "id": True, "version": True, "tenant_id": True, "state": True, "serialized_state": True, "parameters": True, "flow": {"id", "name", "environment", "storage"}, with_args( "task_runs", { "where": { "state_start_time": { "_lte": str(now.subtract(seconds=3)) } } }, ): {"id", "version", "task_id", "serialized_state"}, } } } self.logger.debug("Querying flow run metadata") result = self.client.graphql(query) return result.data.flow_run # type: ignore
def get_flow_run_info(self, flow_run_id: str) -> FlowRunInfoResult: """ Retrieves version and current state information for the given flow run. Args: - flow_run_id (str): the id of the flow run to get information for Returns: - GraphQLResult: an object representing information about the flow run Raises: - ClientError: if the GraphQL mutation is bad for any reason """ query = { "query": { with_args("flow_run_by_pk", {"id": flow_run_id}): { "id": True, "name": True, "flow_id": True, "parameters": True, "context": True, "version": True, "scheduled_start_time": True, "serialized_state": True, # load all task runs except dynamic task runs with_args("task_runs", { "where": { "map_index": { "_eq": -1 } } }): { "id": True, "task": { "id": True, "slug": True }, "version": True, "serialized_state": True, }, } } } result = self.graphql(query).data.flow_run_by_pk # type: ignore if result is None: raise ClientError( 'Flow run ID not found: "{}"'.format(flow_run_id)) # convert scheduled_start_time from string to datetime result.scheduled_start_time = pendulum.parse( result.scheduled_start_time) # create "state" attribute from serialized_state result.state = prefect.engine.state.State.deserialize( result.pop("serialized_state")) # reformat task_runs task_runs = [] for tr in result.task_runs: tr.state = prefect.engine.state.State.deserialize( tr.pop("serialized_state")) task_info = tr.pop("task") tr.task_id = task_info["id"] tr.task_slug = task_info["slug"] task_runs.append(TaskRunInfoResult(**tr)) result.task_runs = task_runs result.context = (result.context.to_dict() if result.context is not None else None) result.parameters = (result.parameters.to_dict() if result.parameters is not None else None) return FlowRunInfoResult(**result)
def flows(name, version, project, output): """ Describe a Prefect flow. \b Options: --name, -n TEXT A flow name to query [required] --version, -v INTEGER A flow version to query --project, -p TEXT The name of a project to query --output, -o TEXT Output style, currently supports `json`. Defaults to Python dictionary format. """ where_clause = { "_and": { "name": { "_eq": name }, "version": { "_eq": version }, } } query_results = { "name": True, "version": True, "created": True, "description": True, "parameters": True, "archived": True, "storage": True, "environment": True, } if project: where_clause["_and"]["project"] = {"name": {"_eq": project}} query_results["project"] = {"name": True} query = { "query": { with_args( "flow", { "where": where_clause, "order_by": { "name": EnumValue("asc"), "version": EnumValue("desc"), }, "distinct_on": EnumValue("name"), }, ): query_results } } result = Client().graphql(query) flow_data = result.data.flow if flow_data: if output == "json": click.echo(json.dumps(flow_data[0])) else: click.echo(flow_data[0]) else: click.secho("{} not found".format(name), fg="red")