Beispiel #1
0
    async def test_nested_insert_array(self, flow_id):
        """ insert nested objects as an array"""
        flow_run_id = await models.FlowRun(
            flow_id=flow_id,
            states=[
                models.FlowRunState(state="test", serialized_state={}),
                models.FlowRunState(state="test", serialized_state={}),
            ],
        ).insert()

        assert (await models.FlowRunState.where({
            "flow_run_id": {
                "_eq": flow_run_id
            }
        }).count() == 2)
Beispiel #2
0
async def _create_flow_run(
    flow_id: str = None,
    parameters: dict = None,
    context: dict = None,
    scheduled_start_time: datetime.datetime = None,
    flow_run_name: str = None,
    version_group_id: str = None,
    labels: List[str] = None,
    run_config: dict = None,
) -> Any:
    """
    Creates a new flow run for an existing flow.

    Args:
        - flow_id (str): A string representing the current flow id
        - parameters (dict, optional): A dictionary of parameters that were specified for the flow
        - context (dict, optional): A dictionary of context values
        - scheduled_start_time (datetime.datetime): When the flow_run should be scheduled to run. If `None`,
            defaults to right now. Must be UTC.
        - flow_run_name (str, optional): An optional string representing this flow run
        - version_group_id (str, optional): An optional version group ID; if provided, will run the most
            recent unarchived version of the group
        - labels (List[str], optional): a list of labels to apply to this individual flow run
        - run-config (dict, optional): A run-config override for this flow run.
    """

    if flow_id is None and version_group_id is None:
        raise ValueError(
            "One of flow_id or version_group_id must be provided.")

    scheduled_start_time = scheduled_start_time or pendulum.now()

    if flow_id:
        where_clause = {"id": {"_eq": flow_id}}
    elif version_group_id:
        where_clause = {
            "version_group_id": {
                "_eq": version_group_id
            },
            "archived": {
                "_eq": False
            },
        }

    flow = await models.Flow.where(where=where_clause).first(
        {
            "id": True,
            "archived": True,
            "tenant_id": True,
            "environment": True,
            "run_config": True,
            "parameters": True,
            "flow_group_id": True,
            "flow_group": {
                "default_parameters": True,
                "labels": True,
                "run_config": True,
            },
        },
        order_by={"version": EnumValue("desc")},
    )  # type: Any

    if not flow:
        msg = (f"Flow {flow_id} not found" if flow_id else
               f"Version group {version_group_id} has no unarchived flows.")
        raise exceptions.NotFound(msg)
    elif flow.archived:
        raise ValueError(f"Flow {flow.id} is archived.")

    # determine active labels
    if labels is not None:
        run_labels = labels
    elif run_config is not None:
        run_labels = run_config.get("labels") or []
    elif flow.flow_group.labels is not None:
        run_labels = flow.flow_group.labels
    elif flow.flow_group.run_config is not None:
        run_labels = flow.flow_group.run_config.get("labels") or []
    elif flow.run_config is not None:
        run_labels = flow.run_config.get("labels") or []
    elif flow.environment is not None:
        run_labels = flow.environment.get("labels") or []
    else:
        run_labels = []
    run_labels.sort()

    # determine active run_config
    if run_config is None:
        if flow.flow_group.run_config is not None:
            run_config = flow.flow_group.run_config
        else:
            run_config = flow.run_config

    # check parameters
    run_parameters = flow.flow_group.default_parameters
    run_parameters.update((parameters or {}))
    required_parameters = [p["name"] for p in flow.parameters if p["required"]]
    missing = set(required_parameters).difference(run_parameters)
    if missing:
        raise ValueError(f"Required parameters were not supplied: {missing}")
    state = Scheduled(message="Flow run scheduled.",
                      start_time=scheduled_start_time)

    run = models.FlowRun(
        tenant_id=flow.tenant_id,
        flow_id=flow_id or flow.id,
        labels=run_labels,
        parameters=run_parameters,
        run_config=run_config,
        context=context or {},
        scheduled_start_time=scheduled_start_time,
        name=flow_run_name or names.generate_slug(2),
        states=[
            models.FlowRunState(
                tenant_id=flow.tenant_id,
                **models.FlowRunState.fields_from_state(
                    Pending(message="Flow run created")),
            )
        ],
    )

    flow_run_id = await run.insert()

    # apply the flow run's initial state via `set_flow_run_state`
    await api.states.set_flow_run_state(flow_run_id=flow_run_id, state=state)

    return flow_run_id
Beispiel #3
0
async def set_flow_run_state(flow_run_id: str,
                             state: State,
                             version: int = None,
                             agent_id: str = None) -> models.FlowRunState:
    """
    Updates a flow run state.

    Args:
        - flow_run_id (str): the flow run id to update
        - state (State): the new state
        - version (int): a version to enforce version-locking
        - agent_id (str): the ID of an agent instance setting the state

    Returns:
        - models.FlowRunState
    """

    if flow_run_id is None:
        raise ValueError(f"Invalid flow run ID.")

    where = {
        "id": {
            "_eq": flow_run_id
        },
        "_or": [
            # EITHER version locking is enabled and versions match
            {
                "version": {
                    "_eq": version
                },
                "flow": {
                    "flow_group": {
                        "settings": {
                            "_contains": {
                                "version_locking_enabled": True
                            }
                        }
                    }
                },
            },
            # OR version locking is not enabled
            {
                "flow": {
                    "flow_group": {
                        "_not": {
                            "settings": {
                                "_contains": {
                                    "version_locking_enabled": True
                                }
                            }
                        }
                    }
                }
            },
        ],
    }

    flow_run = await models.FlowRun.where(where).first({
        "id": True,
        "state": True,
        "name": True,
        "version": True,
        "flow": {"id", "name", "flow_group_id", "version_group_id"},
        "tenant": {"id", "slug"},
    })

    if not flow_run:
        raise ValueError(f"State update failed for flow run ID {flow_run_id}")

    # --------------------------------------------------------
    # apply downstream updates
    # --------------------------------------------------------

    # FOR CANCELLED STATES:
    #   - set all non-finished task run states to Cancelled
    if isinstance(state, Cancelled):
        task_runs = await models.TaskRun.where({
            "flow_run_id": {
                "_eq": flow_run_id
            }
        }).get({"id", "serialized_state"})
        to_cancel = [
            t for t in task_runs
            if not state_schema.load(t.serialized_state).is_finished()
        ]
        # For a run with many tasks this may be a lot of tasks - at some point
        # we might want to batch this rather than kicking off lots of asyncio
        # tasks at once.
        await asyncio.gather(
            *(api.states.set_task_run_state(t.id, state) for t in to_cancel),
            return_exceptions=True,
        )

    # --------------------------------------------------------
    # insert the new state in the database
    # --------------------------------------------------------

    flow_run_state = models.FlowRunState(
        id=str(uuid.uuid4()),
        tenant_id=flow_run.tenant_id,
        flow_run_id=flow_run_id,
        version=(flow_run.version or 0) + 1,
        state=type(state).__name__,
        timestamp=pendulum.now("UTC"),
        message=state.message,
        result=state.result,
        start_time=getattr(state, "start_time", None),
        serialized_state=state.serialize(),
    )

    await flow_run_state.insert()

    # --------------------------------------------------------
    # apply downstream updates
    # --------------------------------------------------------

    # FOR RUNNING STATES:
    #   - update the flow run heartbeat
    if state.is_running() or state.is_submitted():
        await api.runs.update_flow_run_heartbeat(flow_run_id=flow_run_id)

    # Set agent ID on flow run when submitted by agent
    if state.is_submitted() and agent_id:
        await api.runs.update_flow_run_agent(flow_run_id=flow_run_id,
                                             agent_id=agent_id)

    # --------------------------------------------------------
    # call cloud hooks
    # --------------------------------------------------------

    event = events.FlowRunStateChange(
        flow_run=flow_run,
        state=flow_run_state,
        flow=flow_run.flow,
        tenant=flow_run.tenant,
    )

    asyncio.create_task(api.cloud_hooks.call_hooks(event))

    return flow_run_state