def flows(name, version, project): """ Describe a Prefect flow. \b Options: --name, -n TEXT A flow name to query [required] --version, -v INTEGER A flow version to query --project, -p TEXT The name of a project to query """ query = { "query": { with_args( "flow", { "where": { "_and": { "name": { "_eq": name }, "version": { "_eq": version }, "project": { "name": { "_eq": project } }, } }, "order_by": { "name": EnumValue("asc"), "version": EnumValue("desc"), }, "distinct_on": EnumValue("name"), }, ): { "name": True, "version": True, "project": { "name": True }, "created": True, "description": True, "parameters": True, "archived": True, "storage": True, "environment": True, } } } result = Client().graphql(query) flow_data = result.data.flow if flow_data: click.echo(flow_data[0]) else: click.secho("{} not found".format(name), fg="red")
async def test_traverse_upstream_with_where_clause(self, flow_id): task = await models.Task.where( {"flow_id": {"_eq": flow_id}, "slug": {"_eq": "t5"}} ).first({"id"}) result = await prefect.plugins.hasura.client.execute( { "query": { with_args( "utility_upstream_tasks", { "args": {"start_task_ids": LiteralSetValue([task.id])}, "order_by": { "depth": EnumValue("asc"), "task": {"slug": EnumValue("asc")}, }, "where": { "task": {"slug": {"_neq": "t4"}}, "depth": {"_neq": 4}, }, }, ): {"task": {"slug"}, "depth": True} } } ) assert result.data.utility_upstream_tasks == [ {"task": {"slug": "t5"}, "depth": 0}, {"task": {"slug": "t9"}, "depth": 1}, {"task": {"slug": "t3"}, "depth": 2}, {"task": {"slug": "t8"}, "depth": 2}, {"task": {"slug": "t7"}, "depth": 3}, {"task": {"slug": "t1"}, "depth": 5}, ]
def get_logs( self, start_time: pendulum.DateTime = None, end_time: pendulum.DateTime = None, ) -> List["FlowRunLog"]: """ Get logs for this flow run from `start_time` to `end_time`. Args: - start_time (optional): A time to start the log query at, useful for limiting the scope. If not provided, all logs up to `updated_at` are retrieved. - end_time (optional): A time to end the log query at. By default, this is set to `self.updated_at` which is the last time that the flow run was updated in the backend before this object was created. Returns: A list of `FlowRunLog` objects sorted by timestamp """ client = prefect.Client() end_time = end_time or self.updated_at logs_query = { with_args( "logs", { "order_by": {EnumValue("timestamp"): EnumValue("asc")}, "where": { "_and": [ {"timestamp": {"_lte": end_time.isoformat()}}, ( {"timestamp": {"_gt": start_time.isoformat()}} if start_time else {} ), ] }, }, ): {"timestamp": True, "message": True, "level": True} } result = client.graphql( { "query": { with_args( "flow_run", { "where": {"id": {"_eq": self.flow_run_id}}, }, ): logs_query } } ) # Unpack the result logs = result.get("data", {}).get("flow_run", [{}])[0].get("logs", []) return [FlowRunLog.from_dict(log) for log in logs]
async def test_traverse_downstream_with_limit(self, flow_id): task = await models.Task.where({ "flow_id": { "_eq": flow_id }, "slug": { "_eq": "t1" } }).first({"id"}) result = await hasura.HasuraClient().execute({ "query": { with_args( "utility_downstream_tasks", { "args": { "start_task_ids": LiteralSetValue([task.id]), "depth_limit": 2, }, "order_by": { "depth": EnumValue("asc"), "task": { "slug": EnumValue("asc") }, }, }, ): { "task": {"slug"}, "depth": True } } }) assert result.data.utility_downstream_tasks == [ { "task": { "slug": "t1" }, "depth": 0 }, { "task": { "slug": "t2" }, "depth": 1 }, { "task": { "slug": "t3" }, "depth": 2 }, { "task": { "slug": "t7" }, "depth": 2 }, ]
def projects(name): """ Query information regarding your Prefect projects. \b Options: --name, -n TEXT A project name to query """ query = { "query": { with_args( "project", { "where": { "_and": { "name": { "_eq": name } } }, "order_by": { "name": EnumValue("asc") }, }, ): { "name": True, "created": True, "description": True, with_args("flows_aggregate", { "distinct_on": EnumValue("name") }): { EnumValue("aggregate"): EnumValue("count") }, } } } result = Client().graphql(query) project_data = result.data.project output = [] for item in project_data: output.append([ item.name, item.flows_aggregate.aggregate.count, pendulum.parse(item.created).diff_for_humans(), item.description, ]) click.echo( tabulate( output, headers=["NAME", "FLOW COUNT", "AGE", "DESCRIPTION"], tablefmt="plain", numalign="left", stralign="left", ))
def tasks(name, version, project): """ Describe tasks from a Prefect flow. This command is similar to `prefect describe flow` but instead of flow metadata it outputs task metadata. \b Options: --name, -n TEXT A flow name to query [required] --version, -v INTEGER A flow version to query --project, -p TEXT The name of a project to query """ query = { "query": { with_args( "flow", { "where": { "_and": { "name": {"_eq": name}, "version": {"_eq": version}, "project": {"name": {"_eq": project}}, } }, "order_by": { "name": EnumValue("asc"), "version": EnumValue("desc"), }, "distinct_on": EnumValue("name"), }, ): { "tasks": { "name": True, "created": True, "slug": True, "description": True, "type": True, "max_retries": True, "retry_delay": True, "mapped": True, } } } } result = Client().graphql(query) flow_data = result.data.flow if not flow_data: click.secho("{} not found".format(name), fg="red") return task_data = flow_data[0].tasks if task_data: for item in task_data: click.echo(item) else: click.secho("No tasks found for flow {}".format(name), fg="red")
def flows(name, version, project, output): """ Describe a Prefect flow. \b Options: --name, -n TEXT A flow name to query [required] --version, -v INTEGER A flow version to query --project, -p TEXT The name of a project to query --output, -o TEXT Output format, one of {'json', 'yaml'}. Defaults to json. """ where_clause = { "_and": { "name": {"_eq": name}, "version": {"_eq": version}, "project": {"name": {"_eq": project}}, } } query_results = { "name": True, "version": True, "project": {"name": True}, "created": True, "description": True, "parameters": True, "archived": True, "storage": True, "environment": True, } query = { "query": { with_args( "flow", { "where": where_clause, "order_by": { "name": EnumValue("asc"), "version": EnumValue("desc"), }, "distinct_on": EnumValue("name"), }, ): query_results } } result = Client().graphql(query) flow_data = result.data.flow if flow_data: display_output(flow_data[0].to_dict(), output=output) else: click.secho("{} not found".format(name), fg="red")
async def test_schedule_runs_gives_preference_to_flow_group_schedule( self, flow_id, flow_group_id): # give the flow group a schedule for once a year await models.FlowGroup.where(id=flow_group_id).update(set=dict( schedule=dict(type="Schedule", clocks=[{ "type": "CronClock", "cron": "0 0 1 * *" }]))) # give the flow a schedule for once a minute await models.Flow.where(id=flow_id).update(set=dict( schedule=dict(type="Schedule", clocks=[{ "type": "CronClock", "cron": "* * * * *" }]))) await models.Flow.where(id=flow_id ).update(set=dict(is_schedule_active=True)) await models.FlowRun.where({"flow_id": {"_eq": flow_id}}).delete() await api.flows.schedule_flow_runs(flow_id) # assert the 10 scheduled runs were scheduled months out, not for the next 10 minutes flow_runs = await models.FlowRun.where({ "flow_id": { "_eq": flow_id } }).get( selection_set={"scheduled_start_time"}, order_by={"scheduled_start_time": EnumValue("desc")}, ) assert len(flow_runs) == 10 assert flow_runs[0].scheduled_start_time > pendulum.now("utc").add( minutes=15)
async def test_get_or_create_mapped_children_creates_children( self, flow_id, flow_run_id ): # get a task from the flow task = await models.Task.where({"flow_id": {"_eq": flow_id}}).first({"id"}) task_runs = await models.TaskRun.where({"task_id": {"_eq": task.id}}).get() mapped_children = await api.runs.get_or_create_mapped_task_run_children( flow_run_id=flow_run_id, task_id=task.id, max_map_index=10 ) # confirm 11 children were returned as a result (indices 0, through 10) assert len(mapped_children) == 11 # confirm those 11 children are in the DB assert len(task_runs) + 11 == len( await models.TaskRun.where({"task_id": {"_eq": task.id}}).get() ) # confirm that those 11 children have api.states and the map indices are ordered map_indices = [] for child in mapped_children: task_run = await models.TaskRun.where(id=child).first( { "map_index": True, with_args( "states", {"order_by": {"version": EnumValue("desc")}, "limit": 1}, ): {"id"}, } ) map_indices.append(task_run.map_index) assert task_run.states[0] is not None assert map_indices == sorted(map_indices)
def set_flow_run_state(self, flow_run_id: str, version: int, state: "prefect.engine.state.State") -> None: """ Sets new state for a flow run in the database. Args: - flow_run_id (str): the id of the flow run to set state for - version (int): the current version of the flow run state - state (State): the new state for this flow run Raises: - ClientError: if the GraphQL mutation is bad for any reason """ mutation = { "mutation($state: JSON!)": { with_args( "setFlowRunState", { "input": { "flowRunId": flow_run_id, "version": version, "state": EnumValue("$state"), } }, ): {"id"} } } serialized_state = state.serialize() self.graphql(mutation, state=serialized_state) # type: Any
async def test_schedule_creates_parametrized_flow_runs(self, project_id): clock1 = prefect.schedules.clocks.IntervalClock( start_date=pendulum.now("UTC").add(minutes=1), interval=datetime.timedelta(minutes=2), parameter_defaults=dict(x="a"), ) clock2 = prefect.schedules.clocks.IntervalClock( start_date=pendulum.now("UTC"), interval=datetime.timedelta(minutes=2), parameter_defaults=dict(x="b"), ) flow = prefect.Flow( name="Test Scheduled Flow", schedule=prefect.schedules.Schedule(clocks=[clock1, clock2]), ) flow.add_task(prefect.Parameter("x", default=1)) flow_id = await api.flows.create_flow( project_id=project_id, serialized_flow=flow.serialize() ) await models.FlowRun.where({"flow_id": {"_eq": flow_id}}).delete() assert len(await api.flows.schedule_flow_runs(flow_id)) == 10 flow_runs = await models.FlowRun.where({"flow_id": {"_eq": flow_id}}).get( selection_set={"parameters": True, "scheduled_start_time": True}, order_by={"scheduled_start_time": EnumValue("asc")}, ) assert all([fr.parameters == dict(x="a") for fr in flow_runs[::2]]) assert all([fr.parameters == dict(x="b") for fr in flow_runs[1::2]])
def get_latest_cached_states( self, task_id: str, cache_key: Optional[str], created_after: datetime.datetime ) -> List["prefect.engine.state.State"]: """ Pulls all Cached states for the given task that were created after the provided date. Args: - task_id (str): the task id for this task run - cache_key (Optional[str]): the cache key for this Task's cache; if `None`, the task id alone will be used - created_after (datetime.datetime): the earliest date the state should have been created at Returns: - List[State]: a list of Cached states created after the given date """ where_clause = { "where": { "state": {"_eq": "Cached"}, "_or": [ {"cache_key": {"_eq": cache_key}}, {"task_id": {"_eq": task_id}}, ], "state_timestamp": {"_gte": created_after.isoformat()}, }, "order_by": {"state_timestamp": EnumValue("desc")}, } query = {"query": {with_args("task_run", where_clause): "serialized_state"}} result = self.graphql(query) # type: Any deserializer = prefect.engine.state.State.deserialize valid_states = [ deserializer(res.serialized_state) for res in result.data.task_run ] return valid_states
async def resolve_create_flow(obj: Any, info: GraphQLResolveInfo, input: dict) -> dict: serialized_flow = input["serialized_flow"] project_id = input["project_id"] version_group_id = input.get("version_group_id", None) set_schedule_active = input.get("set_schedule_active", True) description = input.get("description", None) idempotency_key = input.get("idempotency_key", None) if project_id is None: raise ValueError("Invalid project ID") # if no version_group_id is supplied, see if a flow with the same name exists in this # project new_version_group = True if not version_group_id: flow = await models.Flow.where( { "project_id": {"_eq": project_id}, "name": {"_eq": serialized_flow.get("name")}, } ).first( order_by={"created": EnumValue("desc")}, selection_set={"version_group_id"} ) if flow: version_group_id = flow.version_group_id # type:ignore new_version_group = False # otherwise look the flow up directly using the version group ID else: flow = await models.Flow.where( {"version_group_id": {"_eq": version_group_id}} ).first(selection_set={"version_group_id"}) if flow: new_version_group = False flow_id = await api.flows.create_flow( project_id=project_id, serialized_flow=serialized_flow, version_group_id=version_group_id, set_schedule_active=set_schedule_active, description=description, idempotency_key=idempotency_key, ) # archive all other versions if version_group_id: all_other_unarchived_versions = await models.Flow.where( { "version_group_id": {"_eq": version_group_id}, "id": {"_neq": flow_id}, "archived": {"_eq": False}, } ).get( {"id"} ) # type: Any for version in all_other_unarchived_versions: await api.flows.archive_flow(version.id) # type: ignore return {"id": flow_id}
async def schedule_flows(self, n_flows=100) -> int: """ Args: - n_flows (int): the maximum number of flows to schedule Returns: - int: The number of scheduled runs """ now = pendulum.now("utc") # load 100 rows from the schedules table schedules = await models.Schedule.where( { # schedule is active "active": {"_eq": True}, # ensure the flow is not archived "flow": {"archived": {"_eq": False}}, # schedule has already started, or will start within the next day "_and": [ { "_or": [ {"schedule_start": {"_lte": str(now.add(days=1))}}, {"schedule_start": {"_is_null": True}}, ] }, # schedule has not yet ended { "_or": [ {"schedule_end": {"_gte": str(now)}}, {"schedule_end": {"_is_null": True}}, ] }, ], } ).get( selection_set={"id", "flow_id", "last_checked"}, order_by=[{"last_checked": EnumValue("asc_nulls_first")}], limit=n_flows, ) runs_scheduled = 0 # concurrently schedule all runs all_run_ids = await asyncio.gather( *[ api.schedules.schedule_flow_runs( schedule.id, seconds_since_last_checked=60 ) for schedule in schedules ] ) new_runs = sum(len(ids) for ids in all_run_ids) self.logger.info(f"Scheduled {new_runs} flow runs.") runs_scheduled += new_runs return runs_scheduled
async def reap_zombie_cancelling_flow_runs( self, heartbeat_cutoff: datetime.datetime = None) -> int: """ Marks flow runs that are in a `Cancelling` state but fail to move to a `Cancelled` state as `Failed`. Returns: - int: the number of flow runs that were handled """ zombies = 0 heartbeat_cutoff = heartbeat_cutoff or pendulum.now("utc").subtract( minutes=10) where_clause = await self.get_flow_runs_where_clause( heartbeat_cutoff=heartbeat_cutoff) flow_runs = await models.FlowRun.where(where_clause).get( selection_set={"id", "tenant_id"}, limit=5000, order_by={"updated": EnumValue("desc")}, ) if flow_runs: self.logger.info( f"Zombie killer found {len(flow_runs)} flow runs.") # Set flow run states to failed for fr in flow_runs: try: message = "No heartbeat detected from the flow run; marking the run as failed." await prefect.api.states.set_flow_run_state( flow_run_id=fr.id, state=Failed(message=message), ) # log the state change to the flow run await prefect.api.logs.create_logs( [ dict( tenant_id=fr.tenant_id, flow_run_id=fr.id, name=f"{self.logger.name}.FlowRun", message=message, level="ERROR", ) ], defer_db_write=False, ) zombies += 1 except ValueError: self.logger.error("Error updating flow run %s", fr.id, exc_info=True) if zombies: self.logger.info(f"Addressed {zombies} zombie flow runs.") return zombies
def test_tenant_view_query_for_tenants_uses_order_by_in_query(monkeypatch): post = MagicMock(return_value={"data": {"tenant": [TENANT_DATA_1]}}) monkeypatch.setattr("prefect.client.client.Client.post", post) TenantView._query_for_tenants(where={}, order_by={"foo": EnumValue("asc")}) assert ("tenant(where: {}, order_by: { foo: asc })" in post.call_args[1]["params"]["query"])
async def query_upstream(*ids,): return await prefect.plugins.hasura.client.execute( { "query": { with_args( "utility_upstream_tasks", { "args": {"start_task_ids": LiteralSetValue([*ids])}, "order_by": { "depth": EnumValue("asc"), "task": {"slug": EnumValue("asc")}, }, }, ): {"task": {"slug"}, "depth": True} } } )
async def test_get_or_create_mapped_children_handles_partial_children( self, flow_id, flow_run_id): # get a task from the flow task = await models.Task.where({ "flow_id": { "_eq": flow_id } }).first({"id", "cache_key"}) # create a few mapped children await models.TaskRun( flow_run_id=flow_run_id, task_id=task.id, map_index=3, cache_key=task.cache_key, ).insert() stateful_child = await models.TaskRun( flow_run_id=flow_run_id, task_id=task.id, map_index=6, cache_key=task.cache_key, states=[ models.TaskRunState( **models.TaskRunState.fields_from_state( Pending(message="Task run created")), ) ], ).insert() # retrieve mapped children mapped_children = await api.runs.get_or_create_mapped_task_run_children( flow_run_id=flow_run_id, task_id=task.id, max_map_index=10) assert len(mapped_children) == 11 map_indices = [] # confirm each of the mapped children has a state and is ordered properly for child in mapped_children: task_run = await models.TaskRun.where(id=child).first({ "map_index": True, with_args( "states", { "order_by": { "version": EnumValue("desc") }, "limit": 1 }, ): {"id"}, }) map_indices.append(task_run.map_index) assert task_run.states[0] is not None assert map_indices == sorted(map_indices) # confirm the one child created with a state only has the one state child_states = await models.TaskRunState.where({ "task_run_id": { "_eq": stateful_child } }).get() assert len(child_states) == 1
async def create_flow_run( flow_id: str = None, parameters: dict = None, context: dict = None, scheduled_start_time: datetime.datetime = None, flow_run_name: str = None, version_group_id: str = None, idempotency_key: str = None, ) -> Any: """ Creates a new flow run for an existing flow. Args: - flow_id (str): A string representing the current flow id - parameters (dict, optional): A dictionary of parameters that were specified for the flow - context (dict, optional): A dictionary of context values - scheduled_start_time (datetime.datetime): When the flow_run should be scheduled to run. If `None`, defaults to right now. Must be UTC. - flow_run_name (str, optional): An optional string representing this flow run - version_group_id (str, optional): An optional version group ID; if provided, will run the most recent unarchived version of the group - idempotency_key (str, optional): An optional idempotency key to prevent duplicate run creation. Idempotency keys are only respected for 24 hours after a flow is created. """ if idempotency_key is not None: where = { "idempotency_key": {"_eq": idempotency_key}, "created": {"_gt": str(pendulum.now().subtract(days=1))}, } if flow_id is not None: where.update({"flow_id": {"_eq": flow_id}}) if version_group_id is not None: where.update({"flow": {"version_group_id": {"_eq": version_group_id}}}) run = await models.FlowRun.where(where).first( {"id"}, order_by={"created": EnumValue("desc")} ) if run is not None: return run.id flow_run_id = await _create_flow_run( flow_id=flow_id, parameters=parameters, context=context, scheduled_start_time=scheduled_start_time, flow_run_name=flow_run_name, version_group_id=version_group_id, ) if idempotency_key is not None: await models.FlowRun.where(id=flow_run_id).update( {"idempotency_key": idempotency_key} ) return flow_run_id
async def run_once(self) -> int: """ Returns: - int: The number of scheduled runs """ runs_scheduled = 0 iterations = 0 # visit all flows in batches of 500 while True: flows = await models.Flow.where({ # schedule is active "is_schedule_active": { "_eq": True }, # flow is not archived "archived": { "_eq": False }, }).get( selection_set={ "id", }, # deterministic sort for batching order_by=[{ "id": EnumValue("desc") }], limit=500, offset=500 * iterations, ) if not flows: break iterations += 1 # concurrently schedule all runs all_run_ids = await asyncio.gather( *[ api.flows.schedule_flow_runs( flow.id, max_runs=config.services.towel. max_scheduled_runs_per_flow, ) for flow in flows ], return_exceptions=True, ) runs_scheduled += sum( len(ids) for ids in all_run_ids # only include lists to avoid errors if isinstance(ids, list)) self.logger.info(f"Scheduled {runs_scheduled} flow runs.") return runs_scheduled
async def test_new_run_state_is_in_history(self, simple_flow_id): dt = pendulum.datetime(2020, 1, 1) flow_run_id = await api.runs.create_flow_run(flow_id=simple_flow_id, scheduled_start_time=dt) frs = await models.FlowRunState.where({ "flow_run_id": { "_eq": flow_run_id } }).get({"state", "start_time", "message"}, order_by={"timestamp": EnumValue("asc")}) assert len(frs) == 2 assert frs[1].state == "Scheduled" assert frs[1].start_time == dt assert frs[1].message == "Flow run scheduled."
def from_flow_name(cls, flow_name: str, project_name: str = "", last_updated: bool = False) -> "FlowView": """ Get an instance of this class given a flow name. Optionally, a project name can be included since flow names are not guaranteed to be unique across projects. Args: - flow_name: The name of the flow to lookup - project_name: The name of the project to lookup. If `None`, flows with an explicitly null project will be searched. If `""` (default), the lookup will be across all projects. - last_updated: By default, if multiple flows are found an error will be thrown. If `True`, the most recently updated flow will be returned instead. Returns: A new instance of FlowView """ where: Dict[str, Any] = { "name": { "_eq": flow_name }, "archived": { "_eq": False } } if project_name != "": where["project"] = { "name": ({ "_eq": project_name } if project_name else { "_is_null": True }) } flows = cls._query_for_flows( where=where, order_by={"created": EnumValue("desc")}, ) if len(flows) > 1 and not last_updated: raise ValueError( f"Found multiple flows matching {where}. " "Provide a `project_name` as well or toggle `last_updated` " "to use the flow that was most recently updated") flow = flows[0] return cls._from_flow_data(flow)
async def run_once(self) -> int: """ Returns: - int: The number of scheduled runs """ runs_scheduled = 0 iterations = 0 # visit all flows in batches of 500 while True: flows = await models.Flow.where({ # schedule is active "is_schedule_active": { "_eq": True }, # flow is not archived "archived": { "_eq": False }, }).get( selection_set={ "id", }, order_by=[{ "flow_runs_aggregate": { "max": { "scheduled_start_time": EnumValue("asc_nulls_last") } } }], limit=500, offset=500 * iterations, ) if not flows: break iterations += 1 # concurrently schedule all runs all_run_ids = await asyncio.gather( *[api.flows.schedule_flow_runs(flow.id) for flow in flows]) runs_scheduled += sum(len(ids) for ids in all_run_ids) self.logger.info(f"Scheduled {runs_scheduled} flow runs.") return runs_scheduled
def set_task_run_state( self, task_run_id: str, version: int, state: "prefect.engine.state.State", cache_for: datetime.timedelta = None, ) -> None: """ Sets new state for a task run. Args: - task_run_id (str): the id of the task run to set state for - version (int): the current version of the task run state - state (State): the new state for this task run - cache_for (timedelta, optional): how long to store the result of this task for, using the serializer set in config; if not provided, no caching occurs Raises: - ClientError: if the GraphQL mutation is bad for any reason """ mutation = { "mutation($state: JSON!)": { with_args( "setTaskRunState", { "input": { "taskRunId": task_run_id, "version": version, "state": EnumValue("$state"), } }, ): {"id"} } } serialized_state = state.serialize() self.graphql(mutation, variables=dict(state=serialized_state)) # type: Any
def from_flow_group_id(cls, flow_group_id: str) -> "FlowView": """ Get an instance of this class given a `flow_group_id` to lookup; the newest flow in the flow group will be retrieved Args: - flow_group_id: The uuid of the flow group Returns: A new instance of FlowView """ if not isinstance(flow_group_id, str): raise TypeError( f"Unexpected type {type(flow_group_id)!r} for `flow_group_id`, " f"expected 'str'.") return cls._from_flow_data( # Get the most recently created flow in the group cls._query_for_flows( where={"flow_group_id": { "_eq": flow_group_id }}, order_by={"created": EnumValue("desc")}, )[0])
def cloud(name, project, version, watch, logs): """ Run a deployed flow in Prefect Cloud. \b Options: --name, -n TEXT The name of a flow to run [required] --project, -p TEXT The name of a project that contains the flow [required] --version, -v INTEGER A flow version to run --watch, -w Watch current state of the flow run, stream output to stdout --logs, -l Get logs of the flow run, stream output to stdout """ if watch and logs: click.secho( "Streaming state and logs not currently supported together.", fg="red") return query = { "query": { with_args( "flow", { "where": { "_and": { "name": { "_eq": name }, "version": { "_eq": version }, "project": { "name": { "_eq": project } }, } }, "order_by": { "name": EnumValue("asc"), "version": EnumValue("desc"), }, "distinct_on": EnumValue("name"), }, ): { "id": True } } } client = Client() result = client.graphql(query) flow_data = result.data.flow if flow_data: flow_id = flow_data[0].id else: click.secho("{} not found".format(name), fg="red") return flow_run_id = client.create_flow_run(flow_id=flow_id) click.echo("Flow Run ID: {}".format(flow_run_id)) if watch: current_states = [] while True: query = { "query": { with_args("flow_run_by_pk", {"id": flow_run_id}): { with_args( "states", { "order_by": { EnumValue("timestamp"): EnumValue("asc") } }, ): { "state": True, "timestamp": True } } } } result = client.graphql(query) # Filter through retrieved states and output in order for state_index in result.data.flow_run_by_pk.states: state = state_index.state if state not in current_states: if state != "Success" and state != "Failed": click.echo("{} -> ".format(state), nl=False) else: click.echo(state) return current_states.append(state) time.sleep(3) if logs: all_logs = [] log_query = { with_args("logs", { "order_by": { EnumValue("timestamp"): EnumValue("asc") } }): { "timestamp": True, "message": True, "level": True }, "start_time": True, } query = { "query": { with_args( "flow_run", { "where": { "id": { "_eq": flow_run_id } }, "order_by": { EnumValue("start_time"): EnumValue("desc") }, }, ): log_query } } while True: result = Client().graphql(query) flow_run = result.data.flow_run if not flow_run: click.secho("{} not found".format(flow_run_id), fg="red") return new_run = flow_run[0] logs = new_run.logs output = [] for i in logs: if [i.timestamp, i.level, i.message] not in all_logs: if not len(all_logs): click.echo( tabulate( [[i.timestamp, i.level, i.message]], headers=["TIMESTAMP", "LEVEL", "MESSAGE"], tablefmt="plain", numalign="left", stralign="left", )) all_logs.append([i.timestamp, i.level, i.message]) continue output.append([i.timestamp, i.level, i.message]) all_logs.append([i.timestamp, i.level, i.message]) if output: click.echo( tabulate(output, tablefmt="plain", numalign="left", stralign="left")) # Check if state is either Success or Failed, exit if it is pk_query = { "query": { with_args("flow_run_by_pk", {"id": flow_run_id}): { "state": True } } } result = client.graphql(pk_query) if (result.data.flow_run_by_pk.state == "Success" or result.data.flow_run_by_pk.state == "Failed"): return time.sleep(3)
def flows(name, version, project, limit, all_versions): """ Query information regarding your Prefect flows. \b Options: --name, -n TEXT A flow name to query --version, -v TEXT A flow version to query --project, -p TEXT The name of a project to query --limit, -l INTEGER A limit amount of flows to query, defaults to 10 --all-versions Output all versions of a flow, default shows most recent """ distinct_on = EnumValue("name") if all_versions: distinct_on = None query = { "query": { with_args( "flow", { "where": { "_and": { "name": { "_eq": name }, "version": { "_eq": version }, "project": { "name": { "_eq": project } }, } }, "order_by": { "name": EnumValue("asc"), "version": EnumValue("desc"), }, "distinct_on": distinct_on, "limit": limit, }, ): { "name": True, "version": True, "project": { "name": True }, "created": True, } } } result = Client().graphql(query) flow_data = result.data.flow output = [] for item in flow_data: output.append([ item.name, item.version, item.project.name, pendulum.parse(item.created).diff_for_humans(), ]) click.echo( tabulate( output, headers=["NAME", "VERSION", "PROJECT NAME", "AGE"], tablefmt="plain", numalign="left", stralign="left", ))
def logs(name, info): """ Query logs for a flow run. \b Options: --name, -n TEXT A flow run name to query [required] --info, -i Retrieve detailed logging info """ log_query = { with_args("logs", { "order_by": { EnumValue("timestamp"): EnumValue("asc") } }): { "timestamp": True, "message": True, "level": True, }, "start_time": True, } if info: log_query = { with_args("logs", { "order_by": { EnumValue("timestamp"): EnumValue("asc") } }): { "timestamp": True, "info": True }, "start_time": True, } query = { "query": { with_args( "flow_run", { "where": { "name": { "_eq": name } }, "order_by": { EnumValue("start_time"): EnumValue("desc") }, }, ): log_query } } result = Client().graphql(query) flow_run = result.data.flow_run if not flow_run: click.secho("{} not found".format(name), fg="red") return run = flow_run[0] logs = run.logs output = [] if not info: for log in logs: output.append([log.timestamp, log.level, log.message]) click.echo( tabulate( output, headers=["TIMESTAMP", "LEVEL", "MESSAGE"], tablefmt="plain", numalign="left", stralign="left", )) return for log in logs: click.echo(log.info)
def tasks(name, flow_name, flow_version, project, limit): """ Query information regarding your Prefect tasks. \b Options: --name, -n TEXT A task name to query --flow-name, -fn TEXT A flow name to query --flow-version, -fx INTEGER A flow version to query --project, -p TEXT The name of a project to query --limit, -l INTEGER A limit amount of tasks to query, defaults to 10 """ query = { "query": { with_args( "task", { "where": { "_and": { "name": { "_eq": name }, "flow": { "name": { "_eq": flow_name }, "project": { "name": { "_eq": project } }, "version": { "_eq": flow_version }, }, } }, "limit": limit, "order_by": { "created": EnumValue("desc") }, }, ): { "name": True, "created": True, "flow": { "name": True, "version": True }, "mapped": True, "type": True, } } } result = Client().graphql(query) task_data = result.data.task output = [] for item in task_data: output.append([ item.name, item.flow.name, item.flow.version, pendulum.parse(item.created).diff_for_humans(), item.mapped, item.type, ]) click.echo( tabulate( output, headers=[ "NAME", "FLOW NAME", "FLOW VERSION", "AGE", "MAPPED", "TYPE" ], tablefmt="plain", numalign="left", stralign="left", ))
def flow_runs(limit, flow, project, started): """ Query information regarding Prefect flow runs. \b Options: --limit, l INTEGER A limit amount of flow runs to query, defaults to 10 --flow, -f TEXT Name of a flow to query for runs --project, -p TEXT Name of a project to query --started, -s Only retrieve started flow runs, default shows `Scheduled` runs """ if started: order = {"start_time": EnumValue("desc")} where = { "_and": { "flow": { "_and": { "name": { "_eq": flow }, "project": { "name": { "_eq": project } }, } }, "start_time": { "_is_null": False }, } } else: order = {"created": EnumValue("desc")} where = { "flow": { "_and": { "name": { "_eq": flow }, "project": { "name": { "_eq": project } } } } } query = { "query": { with_args("flow_run", { "where": where, "limit": limit, "order_by": order }): { "flow": { "name": True }, "created": True, "state": True, "name": True, "duration": True, "start_time": True, } } } result = Client().graphql(query) flow_run_data = result.data.flow_run output = [] for item in flow_run_data: start_time = (pendulum.parse(item.start_time).to_datetime_string() if item.start_time else None) output.append([ item.name, item.flow.name, item.state, pendulum.parse(item.created).diff_for_humans(), start_time, item.duration, ]) click.echo( tabulate( output, headers=[ "NAME", "FLOW NAME", "STATE", "AGE", "START TIME", "DURATION" ], tablefmt="plain", numalign="left", stralign="left", ))