async def test_no_end_if_running_is_last_state(self, flow_run_id): details = dict( flow_run_id=flow_run_id, serialized_state={}, ) st = pendulum.now("UTC") await models.FlowRunState.insert_many([ models.FlowRunState( **details, version=0, state="Pending", ), models.FlowRunState(**details, version=1, state="Running", timestamp=st), ]) run = await models.FlowRun.where(id=flow_run_id).first( {"start_time", "end_time", "duration"}) assert run.start_time == st assert run.end_time is None assert run.duration is None
async def test_start_and_end_from_running_state(self, flow_run_id): details = dict(flow_run_id=flow_run_id, serialized_state={}) st = pendulum.now("UTC") et = pendulum.now("UTC").add(days=1) await models.FlowRunState.insert_many([ models.FlowRunState(**details, version=0, state="Pending"), models.FlowRunState(**details, version=1, state="Running", timestamp=st), models.FlowRunState(**details, version=2, state="Failed", timestamp=et), models.FlowRunState(**details, version=3, state="Retrying"), models.FlowRunState(**details, version=4, state="Retrying"), ]) run = await models.FlowRun.where(id=flow_run_id).first( {"start_time", "end_time", "duration"}) assert run.start_time == st assert run.end_time == et assert run.duration == (et - st).as_timedelta()
async def test_version_order_determines_timestamp(self, flow_run_id): details = dict(flow_run_id=flow_run_id, serialized_state={}) st = pendulum.now("UTC") et = pendulum.now("UTC").add(days=1) not_et = pendulum.now("UTC").add(hours=1) await models.FlowRunState.insert_many([ models.FlowRunState(**details, version=0, state="Pending"), models.FlowRunState(**details, version=1, state="Running", timestamp=st), models.FlowRunState(**details, version=2, state="Failed"), models.FlowRunState(**details, version=3, state="Retrying"), models.FlowRunState(**details, version=4, state="Running"), models.FlowRunState(**details, version=6, state="Failed"), models.FlowRunState(**details, version=7, state="Retrying"), models.FlowRunState(**details, version=8, state="Running"), models.FlowRunState(**details, version=5, state="Success", timestamp=not_et), models.FlowRunState(**details, version=9, state="Success", timestamp=et), ]) run = await models.FlowRun.where(id=flow_run_id).first( {"start_time", "end_time", "duration"}) assert run.start_time == st assert run.end_time == et assert run.duration == (et - st).as_timedelta()
async def test_nested_insert_array(self, flow_id): """ insert nested objects as an array""" flow_run_id = await m.FlowRun( flow_id=flow_id, states=[ m.FlowRunState(state="test", serialized_state={}), m.FlowRunState(state="test", serialized_state={}), ], ).insert() assert (await m.FlowRunState.where({ "flow_run_id": { "_eq": flow_run_id } }).count() == 2)
async def test_inserting_running_state_has_effect(self, flow_run_id): details = dict(flow_run_id=flow_run_id, serialized_state={}) await models.FlowRunState.insert_many([ models.FlowRunState(**details, version=0, state="Pending"), models.FlowRunState(**details, version=1, state="Running"), models.FlowRunState(**details, version=2, state="Failed"), models.FlowRunState(**details, version=3, state="Retrying"), models.FlowRunState(**details, version=4, state="Retrying"), ]) run = await models.FlowRun.where(id=flow_run_id).first( {"start_time", "end_time", "duration"}) assert run.start_time is not None assert run.end_time is not None assert run.duration is not None
async def set_flow_run_state(flow_run_id: str, state: State) -> Dict[str, str]: """ Updates a flow run state. Args: - flow_run_id (str): the flow run id to update - state (State): the new state Returns: - Dict[str, str]: Mapping indicating status of the state change operation. """ if flow_run_id is None: raise ValueError(f"Invalid flow run ID.") flow_run = await models.FlowRun.where({ "id": { "_eq": flow_run_id }, }).first({ "id": True, "state": True, "name": True, "version": True, }) if not flow_run: raise ValueError(f"Invalid flow run ID: {flow_run_id}.") # -------------------------------------------------------- # insert the new state in the database # -------------------------------------------------------- flow_run_state = models.FlowRunState( flow_run_id=flow_run_id, version=(flow_run.version or 0) + 1, state=type(state).__name__, timestamp=pendulum.now("UTC"), message=state.message, result=state.result, start_time=getattr(state, "start_time", None), serialized_state=state.serialize(), ) await flow_run_state.insert() return {"status": "SUCCESS"}
async def _create_flow_run( flow_id: str = None, parameters: dict = None, context: dict = None, scheduled_start_time: datetime.datetime = None, flow_run_name: str = None, version_group_id: str = None, ) -> Any: """ Creates a new flow run for an existing flow. Args: - flow_id (str): A string representing the current flow id - parameters (dict, optional): A dictionary of parameters that were specified for the flow - context (dict, optional): A dictionary of context values - scheduled_start_time (datetime.datetime): When the flow_run should be scheduled to run. If `None`, defaults to right now. Must be UTC. - flow_run_name (str, optional): An optional string representing this flow run - version_group_id (str, optional): An optional version group ID; if provided, will run the most recent unarchived version of the group """ if flow_id is None and version_group_id is None: raise ValueError( "One of flow_id or version_group_id must be provided.") scheduled_start_time = scheduled_start_time or pendulum.now() if flow_id: where_clause = {"id": {"_eq": flow_id}} elif version_group_id: where_clause = { "version_group_id": { "_eq": version_group_id }, "archived": { "_eq": False }, } flow = await models.Flow.where(where=where_clause).first( { "id": True, "archived": True, "tenant_id": True, "parameters": True, "flow_group_id": True, "flow_group": { "default_parameters": True }, }, order_by={"version": EnumValue("desc")}, ) # type: Any if not flow: msg = (f"Flow {flow_id} not found" if flow_id else f"Version group {version_group_id} has no unarchived flows.") raise exceptions.NotFound(msg) elif flow.archived: raise ValueError(f"Flow {flow.id} is archived.") # check parameters run_parameters = flow.flow_group.default_parameters run_parameters.update((parameters or {})) required_parameters = [p["name"] for p in flow.parameters if p["required"]] missing = set(required_parameters).difference(run_parameters) if missing: raise ValueError(f"Required parameters were not supplied: {missing}") state = Scheduled(message="Flow run scheduled.", start_time=scheduled_start_time) run = models.FlowRun( tenant_id=flow.tenant_id, flow_id=flow_id or flow.id, parameters=parameters, context=context or {}, scheduled_start_time=scheduled_start_time, name=flow_run_name or names.generate_slug(2), states=[ models.FlowRunState( tenant_id=flow.tenant_id, **models.FlowRunState.fields_from_state( Pending(message="Flow run created")), ) ], ) flow_run_id = await run.insert() # apply the flow run's initial state via `set_flow_run_state` await api.states.set_flow_run_state(flow_run_id=flow_run_id, state=state) return flow_run_id
async def set_flow_run_state(flow_run_id: str, state: State, version: int = None) -> models.FlowRunState: """ Updates a flow run state. Args: - flow_run_id (str): the flow run id to update - state (State): the new state - version (int): a version to enforce version-locking Returns: - models.FlowRunState """ if flow_run_id is None: raise ValueError(f"Invalid flow run ID.") where = { "id": { "_eq": flow_run_id }, "_or": [ # EITHER version locking is enabled and versions match { "version": { "_eq": version }, "flow": { "flow_group": { "settings": { "_contains": { "version_locking_enabled": True } } } }, }, # OR version locking is not enabled { "flow": { "flow_group": { "_not": { "settings": { "_contains": { "version_locking_enabled": True } } } } } }, ], } flow_run = await models.FlowRun.where(where).first({ "id": True, "state": True, "name": True, "version": True, "flow": {"id", "name", "flow_group_id", "version_group_id"}, "tenant": {"id", "slug"}, }) if not flow_run: raise ValueError(f"State update failed for flow run ID {flow_run_id}") # -------------------------------------------------------- # apply downstream updates # -------------------------------------------------------- # FOR CANCELLED STATES: # - set all non-finished task run states to Cancelled if isinstance(state, Cancelled): task_runs = await models.TaskRun.where({ "flow_run_id": { "_eq": flow_run_id } }).get({"id", "serialized_state"}) to_cancel = [ t for t in task_runs if not state_schema.load(t.serialized_state).is_finished() ] # For a run with many tasks this may be a lot of tasks - at some point # we might want to batch this rather than kicking off lots of asyncio # tasks at once. await asyncio.gather( *(api.states.set_task_run_state(t.id, state) for t in to_cancel), return_exceptions=True, ) # -------------------------------------------------------- # insert the new state in the database # -------------------------------------------------------- flow_run_state = models.FlowRunState( id=str(uuid.uuid4()), tenant_id=flow_run.tenant_id, flow_run_id=flow_run_id, version=(flow_run.version or 0) + 1, state=type(state).__name__, timestamp=pendulum.now("UTC"), message=state.message, result=state.result, start_time=getattr(state, "start_time", None), serialized_state=state.serialize(), ) await flow_run_state.insert() # -------------------------------------------------------- # apply downstream updates # -------------------------------------------------------- # FOR RUNNING STATES: # - update the flow run heartbeat if state.is_running() or state.is_submitted(): await api.runs.update_flow_run_heartbeat(flow_run_id=flow_run_id) # -------------------------------------------------------- # call cloud hooks # -------------------------------------------------------- event = events.FlowRunStateChange( flow_run=flow_run, state=flow_run_state, flow=flow_run.flow, tenant=flow_run.tenant, ) asyncio.create_task(api.cloud_hooks.call_hooks(event)) return flow_run_state