async def register_tasks(flow_id: str, tenant_id: str, tasks: List[Union[TaskSchema, dict]]) -> None: batch_insertion_size = config.insert_many_batch_size tasks = parse_obj_as(List[TaskSchema], tasks) for tasks_chunk in chunked_iterable(tasks, batch_insertion_size): await models.Task.insert_many( [ models.Task( id=t.id, flow_id=flow_id, tenant_id=tenant_id, name=t.name, slug=t.slug, type=t.type, max_retries=t.max_retries, tags=t.tags, retry_delay=t.retry_delay, trigger=t.trigger, auto_generated=t.auto_generated, cache_key=t.cache_key, ) for t in tasks_chunk ], on_conflict=dict(constraint="task_flow_id_slug_key", update_columns=[]), )
async def create_flow( serialized_flow: dict, project_id: str, version_group_id: str = None, set_schedule_active: bool = True, description: str = None, ) -> str: """ Add a flow to the database. Args: - project_id (str): A project id - serialized_flow (dict): A dictionary of information used to represent a flow - version_group_id (str): A version group to add the Flow to - set_schedule_active (bool): Whether to set the flow's schedule to active - description (str): a description of the flow being created Returns: str: The id of the new flow Raises: - ValueError: if the flow's version of Prefect Core falls below the cutoff """ flow = FlowSchema(**serialized_flow) # core versions before 0.6.1 were used only for internal purposes-- this is our cutoff core_version = flow.environment.get("__version__", None) if core_version and module_version.parse( core_version) < module_version.parse(config.core_version_cutoff): raise ValueError( "Prefect Server requires new flows to be built with Prefect " f"{config.core_version_cutoff}+, but this flow was built with " f"Prefect {core_version}.") # load project project = await models.Project.where(id=project_id).first({"tenant_id"}) if not project: raise ValueError("Invalid project.") tenant_id = project.tenant_id # type: ignore # set up task detail info task_lookup = {t.slug: t for t in flow.tasks} tasks_with_upstreams = {e.downstream_task for e in flow.edges} tasks_with_downstreams = {e.upstream_task for e in flow.edges} reference_tasks = set(flow.reference_tasks) or { t.slug for t in flow.tasks if t.slug not in tasks_with_downstreams } for t in flow.tasks: t.mapped = any(e.mapped for e in flow.edges if e.downstream_task == t.slug) t.is_reference_task = t.slug in reference_tasks t.is_root_task = t.slug not in tasks_with_upstreams t.is_terminal_task = t.slug not in tasks_with_downstreams # set up versioning version_group_id = version_group_id or str(uuid.uuid4()) version_where = { "version_group_id": { "_eq": version_group_id }, "tenant_id": { "_eq": tenant_id }, } # set up a flow group if it's not already in the system flow_group = await models.FlowGroup.where({ "_and": [ { "tenant_id": { "_eq": tenant_id } }, { "name": { "_eq": version_group_id } }, ] }).first({"id", "schedule"}) if flow_group is None: flow_group_id = await models.FlowGroup( tenant_id=tenant_id, name=version_group_id, settings={ "heartbeat_enabled": True, "lazarus_enabled": True, "version_locking_enabled": False, }, ).insert() else: flow_group_id = flow_group.id version = (await models.Flow.where(version_where).max({"version"} ))["version"] or 0 # if there is no referenceable schedule for this Flow, # we should set its "schedule" to inactive to avoid confusion if flow.schedule is None and getattr(flow_group, "schedule", None) is None: set_schedule_active = False # precompute task ids to make edges easy to add to database flow_id = await models.Flow( tenant_id=tenant_id, project_id=project_id, name=flow.name, serialized_flow=serialized_flow, environment=flow.environment, core_version=flow.environment.get("__version__"), storage=flow.storage, parameters=flow.parameters, version_group_id=version_group_id, version=version + 1, archived=False, flow_group_id=flow_group_id, description=description, schedule=serialized_flow.get("schedule"), is_schedule_active=False, tasks=[ models.Task( id=t.id, tenant_id=tenant_id, name=t.name, slug=t.slug, type=t.type, max_retries=t.max_retries, tags=t.tags, retry_delay=t.retry_delay, trigger=t.trigger, mapped=t.mapped, auto_generated=t.auto_generated, cache_key=t.cache_key, is_reference_task=t.is_reference_task, is_root_task=t.is_root_task, is_terminal_task=t.is_terminal_task, ) for t in flow.tasks ], edges=[ models.Edge( tenant_id=tenant_id, upstream_task_id=task_lookup[e.upstream_task].id, downstream_task_id=task_lookup[e.downstream_task].id, key=e.key, mapped=e.mapped, ) for e in flow.edges ], ).insert() # schedule runs if set_schedule_active: # we don't want to error the Flow creation call as it would prevent other archiving logic # from kicking in try: await api.flows.set_schedule_active(flow_id=flow_id) except ValueError: pass return flow_id
async def create_flow( serialized_flow: dict, project_id: str, version_group_id: str = None, set_schedule_active: bool = True, description: str = None, idempotency_key: str = None, ) -> str: """ Add a flow to the database. Args: - project_id (str): A project id - serialized_flow (dict): A dictionary of information used to represent a flow - version_group_id (str): A version group to add the Flow to - set_schedule_active (bool): Whether to set the flow's schedule to active - description (str): a description of the flow being created - idempotency_key (optional, str): a key that, if matching the most recent call to `create_flow` for this flow group, will prevent the creation of another flow version Returns: str: The id of the new flow Raises: - ValueError: if the flow's version of Prefect Core falls below the cutoff """ flow = FlowSchema(**serialized_flow) # core versions before 0.6.1 were used only for internal purposes-- this is our cutoff core_version = flow.__version__ if core_version and module_version.parse( core_version) < module_version.parse(config.core_version_cutoff): raise ValueError( "Prefect backends require new flows to be built with Prefect " f"{config.core_version_cutoff}+, but this flow was built with " f"Prefect {core_version}.") # load project project = await models.Project.where(id=project_id).first({"tenant_id"}) if not project: raise ValueError("Invalid project.") tenant_id = project.tenant_id # type: ignore # set up task detail info task_lookup = {t.slug: t for t in flow.tasks} tasks_with_upstreams = {e.downstream_task for e in flow.edges} tasks_with_downstreams = {e.upstream_task for e in flow.edges} reference_tasks = set(flow.reference_tasks) or { t.slug for t in flow.tasks if t.slug not in tasks_with_downstreams } for t in flow.tasks: t.mapped = any(e.mapped for e in flow.edges if e.downstream_task == t.slug) t.is_reference_task = t.slug in reference_tasks t.is_root_task = t.slug not in tasks_with_upstreams t.is_terminal_task = t.slug not in tasks_with_downstreams # set up versioning version_group_id = version_group_id or str(uuid.uuid4()) version_where = { "version_group_id": { "_eq": version_group_id }, "tenant_id": { "_eq": tenant_id }, } # lookup the associated flow group (may not exist yet) flow_group = await models.FlowGroup.where({ "_and": [ { "tenant_id": { "_eq": tenant_id } }, { "name": { "_eq": version_group_id } }, ] }).first({"id", "schedule", "settings"}) # create the flow group or check for the idempotency key in the existing flow group # settings if flow_group is None: flow_group_id = await models.FlowGroup( tenant_id=tenant_id, name=version_group_id, settings={ "heartbeat_enabled": True, "lazarus_enabled": True, "version_locking_enabled": False, "idempotency_key": idempotency_key, }, ).insert() else: flow_group_id = flow_group.id # check idempotency key and exit early if we find a matching key and flow, # otherwise update the key for the group last_idempotency_key = flow_group.settings.get("idempotency_key", None) if (last_idempotency_key and idempotency_key and last_idempotency_key == idempotency_key): # get the most recent unarchived version, there should only be one # unarchived flow at a time but it is safer not to presume flow_model = await models.Flow.where({ "version_group_id": { "_eq": version_group_id }, "archived": { "_eq": False }, }).first(order_by={"version": EnumValue("desc")}) if flow_model: return flow_model.id # otherwise, despite the key matching we don't have a valid flow to return # and will continue as though the key did not match settings = flow_group.settings settings["idempotency_key"] = idempotency_key await models.FlowGroup.where({ "id": { "_eq": flow_group.id } }).update(set={"settings": settings}) version = (await models.Flow.where(version_where).max({"version"} ))["version"] or 0 # if there is no referenceable schedule for this Flow, # we should set its "schedule" to inactive to avoid confusion if flow.schedule is None and getattr(flow_group, "schedule", None) is None: set_schedule_active = False # precompute task ids to make edges easy to add to database flow_id = await models.Flow( tenant_id=tenant_id, project_id=project_id, name=flow.name, serialized_flow=serialized_flow, environment=flow.environment, run_config=flow.run_config, core_version=flow.__version__, storage=flow.storage, parameters=flow.parameters, version_group_id=version_group_id, version=version + 1, archived=False, flow_group_id=flow_group_id, description=description, schedule=serialized_flow.get("schedule"), is_schedule_active=False, tasks=[ models.Task( id=t.id, tenant_id=tenant_id, name=t.name, slug=t.slug, type=t.type, max_retries=t.max_retries, tags=t.tags, retry_delay=t.retry_delay, trigger=t.trigger, mapped=t.mapped, auto_generated=t.auto_generated, cache_key=t.cache_key, is_reference_task=t.is_reference_task, is_root_task=t.is_root_task, is_terminal_task=t.is_terminal_task, ) for t in flow.tasks ], edges=[ models.Edge( tenant_id=tenant_id, upstream_task_id=task_lookup[e.upstream_task].id, downstream_task_id=task_lookup[e.downstream_task].id, key=e.key, mapped=e.mapped, ) for e in flow.edges ], ).insert() # schedule runs if set_schedule_active: # we don't want to error the Flow creation call as it would prevent other archiving logic # from kicking in try: await api.flows.set_schedule_active(flow_id=flow_id) except ValueError: pass return flow_id