Example #1
0
async def create_flow(
    serialized_flow: dict,
    version_group_id: str = None,
    set_schedule_active: bool = True,
    description: str = None,
) -> str:
    """
    Add a flow to the database.

    Args:
        - serialized_flow (dict): A dictionary of information used to represent a flow
        - version_group_id (str): A version group to add the Flow to
        - set_schedule_active (bool): Whether to set the flow's schedule to active
        - description (str): a description of the flow being created

    Returns:
        str: The id of the new flow

    Raises:
        - ValueError: if the flow's version of Prefect Core falls below the cutoff

    """

    # validate that the flow can be deserialized
    try:
        # pass a copy because the load mutates the payload
        f = prefect.serialization.flow.FlowSchema().load(
            copy.deepcopy(serialized_flow))
    except Exception as exc:
        raise ValueError(f"Invalid flow: {exc}")
    required_parameters = [p for p in f.parameters() if p.required]
    if f.schedule is not None and required_parameters:
        required_names = {p.name for p in required_parameters}
        if not all([
                required_names <= set(c.parameter_defaults.keys())
                for c in f.schedule.clocks
        ]):
            raise ValueError(
                "Can not schedule a flow that has required parameters.")

    # set up task detail info
    reference_tasks = f.reference_tasks()
    root_tasks = f.root_tasks()
    terminal_tasks = f.terminal_tasks()
    task_info = {
        t["slug"]: {
            "type": t["type"],
            "trigger": t["trigger"]
        }
        for t in serialized_flow["tasks"]
    }
    for t in f.tasks:
        task_info[t.slug].update(
            mapped=any(e.mapped for e in f.edges_to(t)),
            is_reference_task=(t in reference_tasks),
            is_root_task=(t in root_tasks),
            is_terminal_task=(t in terminal_tasks),
        )

    # set up versioning
    version_group_id = version_group_id or str(uuid.uuid4())
    version_where = {"version_group_id": {"_eq": version_group_id}}

    version = (await models.Flow.where(version_where).max({"version"}
                                                          ))["version"] or 0

    # precompute task ids to make edges easy to add to database
    task_ids = {t.slug: str(uuid.uuid4()) for t in f.tasks}
    flow_id = await models.Flow(
        name=f.name,
        serialized_flow=serialized_flow,
        environment=serialized_flow.get("environment"),
        core_version=serialized_flow.get("environment", {}).get("__version__"),
        storage=serialized_flow.get("storage"),
        parameters=serialized_flow.get("parameters"),
        version_group_id=version_group_id,
        version=version + 1,
        archived=False,
        description=description,
        settings={
            "heartbeat_enabled": True
        },
        schedules=[
            models.Schedule(
                schedule=serialized_flow.get("schedule"),
                active=set_schedule_active,
                schedule_start=f.schedule.start_date,
                schedule_end=f.schedule.end_date,
            )
        ] if f.schedule else [],
        tasks=[
            models.Task(
                id=task_ids[t.slug],
                name=t.name,
                slug=t.slug,
                type=task_info[t.slug]["type"],
                max_retries=t.max_retries,
                tags=list(t.tags),
                retry_delay=t.retry_delay,
                trigger=task_info[t.slug]["trigger"]["fn"],
                mapped=task_info[t.slug]["mapped"],
                auto_generated=getattr(t, "auto_generated", False),
                cache_key=t.cache_key,
                is_reference_task=task_info[t.slug]["is_reference_task"],
                is_root_task=task_info[t.slug]["is_root_task"],
                is_terminal_task=task_info[t.slug]["is_terminal_task"],
            ) for t in f.tasks
        ],
        edges=[
            models.Edge(
                upstream_task_id=task_ids[e.upstream_task.slug],
                downstream_task_id=task_ids[e.downstream_task.slug],
                key=e.key,
                mapped=e.mapped,
            ) for e in f.edges
        ],
    ).insert()

    # schedule runs
    if set_schedule_active and f.schedule:
        schedule = await models.Schedule.where({
            "flow_id": {
                "_eq": flow_id
            }
        }).first({"id"})
        await api.schedules.schedule_flow_runs(schedule_id=schedule.id)

    return flow_id
Example #2
0
async def create_flow(
    serialized_flow: dict,
    project_id: str,
    version_group_id: str = None,
    set_schedule_active: bool = True,
    description: str = None,
) -> str:
    """
    Add a flow to the database.

    Args:
        - project_id (str): A project id
        - serialized_flow (dict): A dictionary of information used to represent a flow
        - version_group_id (str): A version group to add the Flow to
        - set_schedule_active (bool): Whether to set the flow's schedule to active
        - description (str): a description of the flow being created

    Returns:
        str: The id of the new flow

    Raises:
        - ValueError: if the flow's version of Prefect Core falls below the cutoff

    """
    flow = FlowSchema(**serialized_flow)

    # core versions before 0.6.1 were used only for internal purposes-- this is our cutoff
    core_version = flow.environment.get("__version__", None)
    if core_version and module_version.parse(
            core_version) < module_version.parse(config.core_version_cutoff):
        raise ValueError(
            "Prefect Server requires new flows to be built with Prefect "
            f"{config.core_version_cutoff}+, but this flow was built with "
            f"Prefect {core_version}.")

    # load project
    project = await models.Project.where(id=project_id).first({"tenant_id"})
    if not project:
        raise ValueError("Invalid project.")
    tenant_id = project.tenant_id  # type: ignore

    # check required parameters - can't load a flow that has required params and a shcedule
    # NOTE: if we allow schedules to be set via UI in the future, we might skip or
    # refactor this check
    required_parameters = [p for p in flow.parameters if p.required]
    if flow.schedule is not None and required_parameters:
        required_names = {p.name for p in required_parameters}
        if not all([
                required_names <= set(c.parameter_defaults)
                for c in flow.schedule.clocks
        ]):
            raise ValueError(
                "Can not schedule a flow that has required parameters.")

    # set up task detail info
    task_lookup = {t.slug: t for t in flow.tasks}
    tasks_with_upstreams = {e.downstream_task for e in flow.edges}
    tasks_with_downstreams = {e.upstream_task for e in flow.edges}
    reference_tasks = set(flow.reference_tasks) or {
        t.slug
        for t in flow.tasks if t.slug not in tasks_with_downstreams
    }

    for t in flow.tasks:
        t.mapped = any(e.mapped for e in flow.edges
                       if e.downstream_task == t.slug)
        t.is_reference_task = t.slug in reference_tasks
        t.is_root_task = t.slug not in tasks_with_upstreams
        t.is_terminal_task = t.slug not in tasks_with_downstreams

    # set up versioning
    version_group_id = version_group_id or str(uuid.uuid4())
    version_where = {
        "version_group_id": {
            "_eq": version_group_id
        },
        "tenant_id": {
            "_eq": tenant_id
        },
    }
    # set up a flow group if it's not already in the system
    flow_group = await models.FlowGroup.where({
        "_and": [
            {
                "tenant_id": {
                    "_eq": tenant_id
                }
            },
            {
                "name": {
                    "_eq": version_group_id
                }
            },
        ]
    }).first({"id", "schedule"})
    if flow_group is None:
        flow_group_id = await models.FlowGroup(
            tenant_id=tenant_id,
            name=version_group_id,
            settings={
                "heartbeat_enabled": True,
                "lazarus_enabled": True,
                "version_locking_enabled": False,
            },
        ).insert()
    else:
        flow_group_id = flow_group.id

    version = (await models.Flow.where(version_where).max({"version"}
                                                          ))["version"] or 0

    # if there is no referenceable schedule for this Flow,
    # we should set its "schedule" to inactive to avoid confusion
    if flow.schedule is None and getattr(flow_group, "schedule", None) is None:
        set_schedule_active = False

    # precompute task ids to make edges easy to add to database
    flow_id = await models.Flow(
        tenant_id=tenant_id,
        project_id=project_id,
        name=flow.name,
        serialized_flow=serialized_flow,
        environment=flow.environment,
        core_version=flow.environment.get("__version__"),
        storage=flow.storage,
        parameters=flow.parameters,
        version_group_id=version_group_id,
        version=version + 1,
        archived=False,
        flow_group_id=flow_group_id,
        description=description,
        schedule=serialized_flow.get("schedule"),
        is_schedule_active=set_schedule_active,
        tasks=[
            models.Task(
                id=t.id,
                tenant_id=tenant_id,
                name=t.name,
                slug=t.slug,
                type=t.type,
                max_retries=t.max_retries,
                tags=t.tags,
                retry_delay=t.retry_delay,
                trigger=t.trigger,
                mapped=t.mapped,
                auto_generated=t.auto_generated,
                cache_key=t.cache_key,
                is_reference_task=t.is_reference_task,
                is_root_task=t.is_root_task,
                is_terminal_task=t.is_terminal_task,
            ) for t in flow.tasks
        ],
        edges=[
            models.Edge(
                tenant_id=tenant_id,
                upstream_task_id=task_lookup[e.upstream_task].id,
                downstream_task_id=task_lookup[e.downstream_task].id,
                key=e.key,
                mapped=e.mapped,
            ) for e in flow.edges
        ],
    ).insert()

    # schedule runs
    if set_schedule_active:
        await schedule_flow_runs(flow_id=flow_id)

    return flow_id