예제 #1
0
async def register_tasks(flow_id: str, tenant_id: str,
                         tasks: List[Union[TaskSchema, dict]]) -> None:
    batch_insertion_size = config.insert_many_batch_size

    tasks = parse_obj_as(List[TaskSchema], tasks)
    for tasks_chunk in chunked_iterable(tasks, batch_insertion_size):
        await models.Task.insert_many(
            [
                models.Task(
                    id=t.id,
                    flow_id=flow_id,
                    tenant_id=tenant_id,
                    name=t.name,
                    slug=t.slug,
                    type=t.type,
                    max_retries=t.max_retries,
                    tags=t.tags,
                    retry_delay=t.retry_delay,
                    trigger=t.trigger,
                    auto_generated=t.auto_generated,
                    cache_key=t.cache_key,
                ) for t in tasks_chunk
            ],
            on_conflict=dict(constraint="task_flow_id_slug_key",
                             update_columns=[]),
        )
예제 #2
0
async def register_edges(flow_id: str, tenant_id: str,
                         edges: List[Union[EdgeSchema, dict]]) -> None:
    batch_insertion_size = config.insert_many_batch_size
    flow = await models.Flow.where(id=flow_id).first(
        {"tasks": {
            "slug": True,
            "id": True
        }})

    task_lookup = {t.slug: t.id for t in flow.tasks}

    edges = parse_obj_as(List[EdgeSchema], edges)

    try:
        for edges_chunk in chunked_iterable(edges, batch_insertion_size):
            await models.Edge.insert_many(
                [
                    models.Edge(
                        tenant_id=tenant_id,
                        flow_id=flow_id,
                        upstream_task_id=task_lookup[e.upstream_task],
                        downstream_task_id=task_lookup[e.downstream_task],
                        key=e.key,
                        mapped=e.mapped,
                    ) for e in edges_chunk
                ],
                on_conflict=dict(constraint="edge_flow_id_task_ids_key",
                                 update_columns=[]),
            )
    except KeyError:
        raise ValueError(
            "Edges could not be registered - some edges reference tasks that do not exist within this flow."
        ) from None
예제 #3
0
def test_chunked_iterable_of_empty_iterable():
    chunks = [chunk for chunk in chunked_iterable([], 4)]
    assert len(chunks) == 0
예제 #4
0
def test_chunked_iterable_of_list():
    chunks = [chunk for chunk in chunked_iterable(list(range(10)), 4)]
    expected_chunks = [(0, 1, 2, 3), (4, 5, 6, 7), (8, 9)]
    assert chunks == expected_chunks
예제 #5
0
파일: flows.py 프로젝트: zhangguiyu/server
async def create_flow(
    serialized_flow: dict,
    project_id: str,
    version_group_id: str = None,
    set_schedule_active: bool = True,
    description: str = None,
    idempotency_key: str = None,
) -> str:
    """
    Add a flow to the database.

    Args:
        - project_id (str): A project id
        - serialized_flow (dict): A dictionary of information used to represent a flow
        - version_group_id (str): A version group to add the Flow to
        - set_schedule_active (bool): Whether to set the flow's schedule to active
        - description (str): a description of the flow being created
        - idempotency_key (optional, str): a key that, if matching the most recent call
            to `create_flow` for this flow group, will prevent the creation of another
            flow version

    Returns:
        str: The id of the new flow

    Raises:
        - ValueError: if the flow's version of Prefect Core falls below the cutoff

    """
    flow = FlowSchema(**serialized_flow)

    # core versions before 0.6.1 were used only for internal purposes-- this is our cutoff
    core_version = flow.__version__
    if core_version and module_version.parse(
            core_version) < module_version.parse(config.core_version_cutoff):
        raise ValueError(
            "Prefect backends require new flows to be built with Prefect "
            f"{config.core_version_cutoff}+, but this flow was built with "
            f"Prefect {core_version}.")

    # load project
    project = await models.Project.where(id=project_id).first({"tenant_id"})
    if not project:
        raise ValueError("Invalid project.")
    tenant_id = project.tenant_id  # type: ignore

    # set up task detail info
    task_lookup = {t.slug: t for t in flow.tasks}
    tasks_with_upstreams = {e.downstream_task for e in flow.edges}
    tasks_with_downstreams = {e.upstream_task for e in flow.edges}
    reference_tasks = set(flow.reference_tasks) or {
        t.slug
        for t in flow.tasks if t.slug not in tasks_with_downstreams
    }

    for t in flow.tasks:
        t.mapped = any(e.mapped for e in flow.edges
                       if e.downstream_task == t.slug)
        t.is_reference_task = t.slug in reference_tasks
        t.is_root_task = t.slug not in tasks_with_upstreams
        t.is_terminal_task = t.slug not in tasks_with_downstreams

    # set up versioning
    version_group_id = version_group_id or str(uuid.uuid4())
    version_where = {
        "version_group_id": {
            "_eq": version_group_id
        },
        "tenant_id": {
            "_eq": tenant_id
        },
    }

    # lookup the associated flow group (may not exist yet)
    flow_group = await models.FlowGroup.where({
        "_and": [
            {
                "tenant_id": {
                    "_eq": tenant_id
                }
            },
            {
                "name": {
                    "_eq": version_group_id
                }
            },
        ]
    }).first({"id", "schedule", "settings"})

    # create the flow group or check for the idempotency key in the existing flow group
    # settings
    if flow_group is None:
        flow_group_id = await models.FlowGroup(
            tenant_id=tenant_id,
            name=version_group_id,
            settings={
                "heartbeat_enabled": True,
                "lazarus_enabled": True,
                "version_locking_enabled": False,
                "idempotency_key": idempotency_key,
            },
        ).insert()
    else:
        flow_group_id = flow_group.id

        # check idempotency key and exit early if we find a matching key and flow,
        # otherwise update the key for the group
        last_idempotency_key = flow_group.settings.get("idempotency_key", None)
        if (last_idempotency_key and idempotency_key
                and last_idempotency_key == idempotency_key):
            # get the most recent unarchived version, there should only be one
            # unarchived flow at a time but it is safer not to presume
            flow_model = await models.Flow.where({
                "version_group_id": {
                    "_eq": version_group_id
                },
                "archived": {
                    "_eq": False
                },
            }).first(order_by={"version": EnumValue("desc")})
            if flow_model:
                return flow_model.id
            # otherwise, despite the key matching we don't have a valid flow to return
            # and will continue as though the key did not match

        settings = flow_group.settings
        settings["idempotency_key"] = idempotency_key
        await models.FlowGroup.where({
            "id": {
                "_eq": flow_group.id
            }
        }).update(set={"settings": settings})

    version = (await models.Flow.where(version_where).max({"version"}
                                                          ))["version"] or 0

    # if there is no referenceable schedule for this Flow,
    # we should set its "schedule" to inactive to avoid confusion
    if flow.schedule is None and getattr(flow_group, "schedule", None) is None:
        set_schedule_active = False

    # precompute task ids to make edges easy to add to database

    # create the flow without tasks or edges initially
    # then insert tasks and edges in batches, so we don't exceed Postgres limits
    # https://doxygen.postgresql.org/fe-exec_8c_source.html line 1409
    flow_id = await models.Flow(
        tenant_id=tenant_id,
        project_id=project_id,
        name=flow.name,
        serialized_flow=serialized_flow,
        environment=flow.environment,
        run_config=flow.run_config,
        core_version=flow.__version__,
        storage=flow.storage,
        parameters=flow.parameters,
        version_group_id=version_group_id,
        version=version + 1,
        archived=False,
        flow_group_id=flow_group_id,
        description=description,
        schedule=serialized_flow.get("schedule"),
        is_schedule_active=False,
        tasks=[],
        edges=[],
    ).insert()

    try:
        batch_insertion_size = 2500

        for tasks_chunk in chunked_iterable(flow.tasks, batch_insertion_size):
            await models.Task.insert_many([
                models.Task(
                    id=t.id,
                    flow_id=flow_id,
                    tenant_id=tenant_id,
                    name=t.name,
                    slug=t.slug,
                    type=t.type,
                    max_retries=t.max_retries,
                    tags=t.tags,
                    retry_delay=t.retry_delay,
                    trigger=t.trigger,
                    mapped=t.mapped,
                    auto_generated=t.auto_generated,
                    cache_key=t.cache_key,
                    is_reference_task=t.is_reference_task,
                    is_root_task=t.is_root_task,
                    is_terminal_task=t.is_terminal_task,
                ) for t in tasks_chunk
            ])

        for edges_chunk in chunked_iterable(flow.edges, batch_insertion_size):
            await models.Edge.insert_many([
                models.Edge(
                    tenant_id=tenant_id,
                    flow_id=flow_id,
                    upstream_task_id=task_lookup[e.upstream_task].id,
                    downstream_task_id=task_lookup[e.downstream_task].id,
                    key=e.key,
                    mapped=e.mapped,
                ) for e in edges_chunk
            ])

    except Exception as exc:
        logger.error("`create_flow` failed during insertion", exc_info=True)
        await api.flows.delete_flow(flow_id=flow_id)
        raise APIError() from exc

    # schedule runs
    if set_schedule_active:
        # we don't want to error the Flow creation call as it would prevent other archiving logic
        # from kicking in
        try:
            await api.flows.set_schedule_active(flow_id=flow_id)
        except Exception:
            logger.error(
                "Failed to set schedule to active while creating flow "
                "(flow_id={flow_id}).",
                exc_info=True,
            )

    return flow_id