Beispiel #1
0
    async def test_no_end_if_running_is_last_state(self, flow_run_id):
        details = dict(
            flow_run_id=flow_run_id,
            serialized_state={},
        )

        st = pendulum.now("UTC")

        await models.FlowRunState.insert_many([
            models.FlowRunState(
                **details,
                version=0,
                state="Pending",
            ),
            models.FlowRunState(**details,
                                version=1,
                                state="Running",
                                timestamp=st),
        ])

        run = await models.FlowRun.where(id=flow_run_id).first(
            {"start_time", "end_time", "duration"})
        assert run.start_time == st
        assert run.end_time is None
        assert run.duration is None
    async def test_start_and_end_from_running_state(self, flow_run_id):
        details = dict(flow_run_id=flow_run_id, serialized_state={})

        st = pendulum.now("UTC")
        et = pendulum.now("UTC").add(days=1)

        await models.FlowRunState.insert_many([
            models.FlowRunState(**details, version=0, state="Pending"),
            models.FlowRunState(**details,
                                version=1,
                                state="Running",
                                timestamp=st),
            models.FlowRunState(**details,
                                version=2,
                                state="Failed",
                                timestamp=et),
            models.FlowRunState(**details, version=3, state="Retrying"),
            models.FlowRunState(**details, version=4, state="Retrying"),
        ])

        run = await models.FlowRun.where(id=flow_run_id).first(
            {"start_time", "end_time", "duration"})
        assert run.start_time == st
        assert run.end_time == et
        assert run.duration == (et - st).as_timedelta()
    async def test_version_order_determines_timestamp(self, flow_run_id):
        details = dict(flow_run_id=flow_run_id, serialized_state={})

        st = pendulum.now("UTC")
        et = pendulum.now("UTC").add(days=1)
        not_et = pendulum.now("UTC").add(hours=1)

        await models.FlowRunState.insert_many([
            models.FlowRunState(**details, version=0, state="Pending"),
            models.FlowRunState(**details,
                                version=1,
                                state="Running",
                                timestamp=st),
            models.FlowRunState(**details, version=2, state="Failed"),
            models.FlowRunState(**details, version=3, state="Retrying"),
            models.FlowRunState(**details, version=4, state="Running"),
            models.FlowRunState(**details, version=6, state="Failed"),
            models.FlowRunState(**details, version=7, state="Retrying"),
            models.FlowRunState(**details, version=8, state="Running"),
            models.FlowRunState(**details,
                                version=5,
                                state="Success",
                                timestamp=not_et),
            models.FlowRunState(**details,
                                version=9,
                                state="Success",
                                timestamp=et),
        ])

        run = await models.FlowRun.where(id=flow_run_id).first(
            {"start_time", "end_time", "duration"})
        assert run.start_time == st
        assert run.end_time == et
        assert run.duration == (et - st).as_timedelta()
Beispiel #4
0
    async def test_nested_insert_array(self, flow_id):
        """ insert nested objects as an array"""
        flow_run_id = await m.FlowRun(
            flow_id=flow_id,
            states=[
                m.FlowRunState(state="test", serialized_state={}),
                m.FlowRunState(state="test", serialized_state={}),
            ],
        ).insert()

        assert (await m.FlowRunState.where({
            "flow_run_id": {
                "_eq": flow_run_id
            }
        }).count() == 2)
    async def test_inserting_running_state_has_effect(self, flow_run_id):
        details = dict(flow_run_id=flow_run_id, serialized_state={})
        await models.FlowRunState.insert_many([
            models.FlowRunState(**details, version=0, state="Pending"),
            models.FlowRunState(**details, version=1, state="Running"),
            models.FlowRunState(**details, version=2, state="Failed"),
            models.FlowRunState(**details, version=3, state="Retrying"),
            models.FlowRunState(**details, version=4, state="Retrying"),
        ])

        run = await models.FlowRun.where(id=flow_run_id).first(
            {"start_time", "end_time", "duration"})
        assert run.start_time is not None
        assert run.end_time is not None
        assert run.duration is not None
Beispiel #6
0
async def set_flow_run_state(flow_run_id: str, state: State) -> Dict[str, str]:
    """
    Updates a flow run state.

    Args:
        - flow_run_id (str): the flow run id to update
        - state (State): the new state
    Returns:
        - Dict[str, str]: Mapping indicating status of the state
            change operation.
    """

    if flow_run_id is None:
        raise ValueError(f"Invalid flow run ID.")

    flow_run = await models.FlowRun.where({
        "id": {
            "_eq": flow_run_id
        },
    }).first({
        "id": True,
        "state": True,
        "name": True,
        "version": True,
    })

    if not flow_run:
        raise ValueError(f"Invalid flow run ID: {flow_run_id}.")

    # --------------------------------------------------------
    # insert the new state in the database
    # --------------------------------------------------------

    flow_run_state = models.FlowRunState(
        flow_run_id=flow_run_id,
        version=(flow_run.version or 0) + 1,
        state=type(state).__name__,
        timestamp=pendulum.now("UTC"),
        message=state.message,
        result=state.result,
        start_time=getattr(state, "start_time", None),
        serialized_state=state.serialize(),
    )

    await flow_run_state.insert()
    return {"status": "SUCCESS"}
Beispiel #7
0
async def _create_flow_run(
    flow_id: str = None,
    parameters: dict = None,
    context: dict = None,
    scheduled_start_time: datetime.datetime = None,
    flow_run_name: str = None,
    version_group_id: str = None,
) -> Any:
    """
    Creates a new flow run for an existing flow.

    Args:
        - flow_id (str): A string representing the current flow id
        - parameters (dict, optional): A dictionary of parameters that were specified for the flow
        - context (dict, optional): A dictionary of context values
        - scheduled_start_time (datetime.datetime): When the flow_run should be scheduled to run. If `None`,
            defaults to right now. Must be UTC.
        - flow_run_name (str, optional): An optional string representing this flow run
        - version_group_id (str, optional): An optional version group ID; if provided, will run the most
            recent unarchived version of the group
    """

    if flow_id is None and version_group_id is None:
        raise ValueError(
            "One of flow_id or version_group_id must be provided.")

    scheduled_start_time = scheduled_start_time or pendulum.now()

    if flow_id:
        where_clause = {"id": {"_eq": flow_id}}
    elif version_group_id:
        where_clause = {
            "version_group_id": {
                "_eq": version_group_id
            },
            "archived": {
                "_eq": False
            },
        }

    flow = await models.Flow.where(where=where_clause).first(
        {
            "id": True,
            "archived": True,
            "tenant_id": True,
            "parameters": True,
            "flow_group_id": True,
            "flow_group": {
                "default_parameters": True
            },
        },
        order_by={"version": EnumValue("desc")},
    )  # type: Any

    if not flow:
        msg = (f"Flow {flow_id} not found" if flow_id else
               f"Version group {version_group_id} has no unarchived flows.")
        raise exceptions.NotFound(msg)
    elif flow.archived:
        raise ValueError(f"Flow {flow.id} is archived.")

    # check parameters
    run_parameters = flow.flow_group.default_parameters
    run_parameters.update((parameters or {}))
    required_parameters = [p["name"] for p in flow.parameters if p["required"]]
    missing = set(required_parameters).difference(run_parameters)
    if missing:
        raise ValueError(f"Required parameters were not supplied: {missing}")
    state = Scheduled(message="Flow run scheduled.",
                      start_time=scheduled_start_time)

    run = models.FlowRun(
        tenant_id=flow.tenant_id,
        flow_id=flow_id or flow.id,
        parameters=parameters,
        context=context or {},
        scheduled_start_time=scheduled_start_time,
        name=flow_run_name or names.generate_slug(2),
        states=[
            models.FlowRunState(
                tenant_id=flow.tenant_id,
                **models.FlowRunState.fields_from_state(
                    Pending(message="Flow run created")),
            )
        ],
    )

    flow_run_id = await run.insert()

    # apply the flow run's initial state via `set_flow_run_state`
    await api.states.set_flow_run_state(flow_run_id=flow_run_id, state=state)

    return flow_run_id
Beispiel #8
0
async def set_flow_run_state(flow_run_id: str,
                             state: State,
                             version: int = None) -> models.FlowRunState:
    """
    Updates a flow run state.

    Args:
        - flow_run_id (str): the flow run id to update
        - state (State): the new state
        - version (int): a version to enforce version-locking

    Returns:
        - models.FlowRunState
    """

    if flow_run_id is None:
        raise ValueError(f"Invalid flow run ID.")

    where = {
        "id": {
            "_eq": flow_run_id
        },
        "_or": [
            # EITHER version locking is enabled and versions match
            {
                "version": {
                    "_eq": version
                },
                "flow": {
                    "flow_group": {
                        "settings": {
                            "_contains": {
                                "version_locking_enabled": True
                            }
                        }
                    }
                },
            },
            # OR version locking is not enabled
            {
                "flow": {
                    "flow_group": {
                        "_not": {
                            "settings": {
                                "_contains": {
                                    "version_locking_enabled": True
                                }
                            }
                        }
                    }
                }
            },
        ],
    }

    flow_run = await models.FlowRun.where(where).first({
        "id": True,
        "state": True,
        "name": True,
        "version": True,
        "flow": {"id", "name", "flow_group_id", "version_group_id"},
        "tenant": {"id", "slug"},
    })

    if not flow_run:
        raise ValueError(f"State update failed for flow run ID {flow_run_id}")

    # --------------------------------------------------------
    # apply downstream updates
    # --------------------------------------------------------

    # FOR CANCELLED STATES:
    #   - set all non-finished task run states to Cancelled
    if isinstance(state, Cancelled):
        task_runs = await models.TaskRun.where({
            "flow_run_id": {
                "_eq": flow_run_id
            }
        }).get({"id", "serialized_state"})
        to_cancel = [
            t for t in task_runs
            if not state_schema.load(t.serialized_state).is_finished()
        ]
        # For a run with many tasks this may be a lot of tasks - at some point
        # we might want to batch this rather than kicking off lots of asyncio
        # tasks at once.
        await asyncio.gather(
            *(api.states.set_task_run_state(t.id, state) for t in to_cancel),
            return_exceptions=True,
        )

    # --------------------------------------------------------
    # insert the new state in the database
    # --------------------------------------------------------

    flow_run_state = models.FlowRunState(
        id=str(uuid.uuid4()),
        tenant_id=flow_run.tenant_id,
        flow_run_id=flow_run_id,
        version=(flow_run.version or 0) + 1,
        state=type(state).__name__,
        timestamp=pendulum.now("UTC"),
        message=state.message,
        result=state.result,
        start_time=getattr(state, "start_time", None),
        serialized_state=state.serialize(),
    )

    await flow_run_state.insert()

    # --------------------------------------------------------
    # apply downstream updates
    # --------------------------------------------------------

    # FOR RUNNING STATES:
    #   - update the flow run heartbeat
    if state.is_running() or state.is_submitted():
        await api.runs.update_flow_run_heartbeat(flow_run_id=flow_run_id)

    # --------------------------------------------------------
    # call cloud hooks
    # --------------------------------------------------------

    event = events.FlowRunStateChange(
        flow_run=flow_run,
        state=flow_run_state,
        flow=flow_run.flow,
        tenant=flow_run.tenant,
    )

    asyncio.create_task(api.cloud_hooks.call_hooks(event))

    return flow_run_state