async def create_flow( serialized_flow: dict, version_group_id: str = None, set_schedule_active: bool = True, description: str = None, ) -> str: """ Add a flow to the database. Args: - serialized_flow (dict): A dictionary of information used to represent a flow - version_group_id (str): A version group to add the Flow to - set_schedule_active (bool): Whether to set the flow's schedule to active - description (str): a description of the flow being created Returns: str: The id of the new flow Raises: - ValueError: if the flow's version of Prefect Core falls below the cutoff """ # validate that the flow can be deserialized try: # pass a copy because the load mutates the payload f = prefect.serialization.flow.FlowSchema().load( copy.deepcopy(serialized_flow)) except Exception as exc: raise ValueError(f"Invalid flow: {exc}") required_parameters = [p for p in f.parameters() if p.required] if f.schedule is not None and required_parameters: required_names = {p.name for p in required_parameters} if not all([ required_names <= set(c.parameter_defaults.keys()) for c in f.schedule.clocks ]): raise ValueError( "Can not schedule a flow that has required parameters.") # set up task detail info reference_tasks = f.reference_tasks() root_tasks = f.root_tasks() terminal_tasks = f.terminal_tasks() task_info = { t["slug"]: { "type": t["type"], "trigger": t["trigger"] } for t in serialized_flow["tasks"] } for t in f.tasks: task_info[t.slug].update( mapped=any(e.mapped for e in f.edges_to(t)), is_reference_task=(t in reference_tasks), is_root_task=(t in root_tasks), is_terminal_task=(t in terminal_tasks), ) # set up versioning version_group_id = version_group_id or str(uuid.uuid4()) version_where = {"version_group_id": {"_eq": version_group_id}} version = (await models.Flow.where(version_where).max({"version"} ))["version"] or 0 # precompute task ids to make edges easy to add to database task_ids = {t.slug: str(uuid.uuid4()) for t in f.tasks} flow_id = await models.Flow( name=f.name, serialized_flow=serialized_flow, environment=serialized_flow.get("environment"), core_version=serialized_flow.get("environment", {}).get("__version__"), storage=serialized_flow.get("storage"), parameters=serialized_flow.get("parameters"), version_group_id=version_group_id, version=version + 1, archived=False, description=description, settings={ "heartbeat_enabled": True }, schedules=[ models.Schedule( schedule=serialized_flow.get("schedule"), active=set_schedule_active, schedule_start=f.schedule.start_date, schedule_end=f.schedule.end_date, ) ] if f.schedule else [], tasks=[ models.Task( id=task_ids[t.slug], name=t.name, slug=t.slug, type=task_info[t.slug]["type"], max_retries=t.max_retries, tags=list(t.tags), retry_delay=t.retry_delay, trigger=task_info[t.slug]["trigger"]["fn"], mapped=task_info[t.slug]["mapped"], auto_generated=getattr(t, "auto_generated", False), cache_key=t.cache_key, is_reference_task=task_info[t.slug]["is_reference_task"], is_root_task=task_info[t.slug]["is_root_task"], is_terminal_task=task_info[t.slug]["is_terminal_task"], ) for t in f.tasks ], edges=[ models.Edge( upstream_task_id=task_ids[e.upstream_task.slug], downstream_task_id=task_ids[e.downstream_task.slug], key=e.key, mapped=e.mapped, ) for e in f.edges ], ).insert() # schedule runs if set_schedule_active and f.schedule: schedule = await models.Schedule.where({ "flow_id": { "_eq": flow_id } }).first({"id"}) await api.schedules.schedule_flow_runs(schedule_id=schedule.id) return flow_id
async def create_flow( serialized_flow: dict, project_id: str, version_group_id: str = None, set_schedule_active: bool = True, description: str = None, ) -> str: """ Add a flow to the database. Args: - project_id (str): A project id - serialized_flow (dict): A dictionary of information used to represent a flow - version_group_id (str): A version group to add the Flow to - set_schedule_active (bool): Whether to set the flow's schedule to active - description (str): a description of the flow being created Returns: str: The id of the new flow Raises: - ValueError: if the flow's version of Prefect Core falls below the cutoff """ flow = FlowSchema(**serialized_flow) # core versions before 0.6.1 were used only for internal purposes-- this is our cutoff core_version = flow.environment.get("__version__", None) if core_version and module_version.parse( core_version) < module_version.parse(config.core_version_cutoff): raise ValueError( "Prefect Server requires new flows to be built with Prefect " f"{config.core_version_cutoff}+, but this flow was built with " f"Prefect {core_version}.") # load project project = await models.Project.where(id=project_id).first({"tenant_id"}) if not project: raise ValueError("Invalid project.") tenant_id = project.tenant_id # type: ignore # check required parameters - can't load a flow that has required params and a shcedule # NOTE: if we allow schedules to be set via UI in the future, we might skip or # refactor this check required_parameters = [p for p in flow.parameters if p.required] if flow.schedule is not None and required_parameters: required_names = {p.name for p in required_parameters} if not all([ required_names <= set(c.parameter_defaults) for c in flow.schedule.clocks ]): raise ValueError( "Can not schedule a flow that has required parameters.") # set up task detail info task_lookup = {t.slug: t for t in flow.tasks} tasks_with_upstreams = {e.downstream_task for e in flow.edges} tasks_with_downstreams = {e.upstream_task for e in flow.edges} reference_tasks = set(flow.reference_tasks) or { t.slug for t in flow.tasks if t.slug not in tasks_with_downstreams } for t in flow.tasks: t.mapped = any(e.mapped for e in flow.edges if e.downstream_task == t.slug) t.is_reference_task = t.slug in reference_tasks t.is_root_task = t.slug not in tasks_with_upstreams t.is_terminal_task = t.slug not in tasks_with_downstreams # set up versioning version_group_id = version_group_id or str(uuid.uuid4()) version_where = { "version_group_id": { "_eq": version_group_id }, "tenant_id": { "_eq": tenant_id }, } # set up a flow group if it's not already in the system flow_group = await models.FlowGroup.where({ "_and": [ { "tenant_id": { "_eq": tenant_id } }, { "name": { "_eq": version_group_id } }, ] }).first({"id", "schedule"}) if flow_group is None: flow_group_id = await models.FlowGroup( tenant_id=tenant_id, name=version_group_id, settings={ "heartbeat_enabled": True, "lazarus_enabled": True, "version_locking_enabled": False, }, ).insert() else: flow_group_id = flow_group.id version = (await models.Flow.where(version_where).max({"version"} ))["version"] or 0 # if there is no referenceable schedule for this Flow, # we should set its "schedule" to inactive to avoid confusion if flow.schedule is None and getattr(flow_group, "schedule", None) is None: set_schedule_active = False # precompute task ids to make edges easy to add to database flow_id = await models.Flow( tenant_id=tenant_id, project_id=project_id, name=flow.name, serialized_flow=serialized_flow, environment=flow.environment, core_version=flow.environment.get("__version__"), storage=flow.storage, parameters=flow.parameters, version_group_id=version_group_id, version=version + 1, archived=False, flow_group_id=flow_group_id, description=description, schedule=serialized_flow.get("schedule"), is_schedule_active=set_schedule_active, tasks=[ models.Task( id=t.id, tenant_id=tenant_id, name=t.name, slug=t.slug, type=t.type, max_retries=t.max_retries, tags=t.tags, retry_delay=t.retry_delay, trigger=t.trigger, mapped=t.mapped, auto_generated=t.auto_generated, cache_key=t.cache_key, is_reference_task=t.is_reference_task, is_root_task=t.is_root_task, is_terminal_task=t.is_terminal_task, ) for t in flow.tasks ], edges=[ models.Edge( tenant_id=tenant_id, upstream_task_id=task_lookup[e.upstream_task].id, downstream_task_id=task_lookup[e.downstream_task].id, key=e.key, mapped=e.mapped, ) for e in flow.edges ], ).insert() # schedule runs if set_schedule_active: await schedule_flow_runs(flow_id=flow_id) return flow_id