def deserialize_dag(cls, encoded_dag: Dict[str, Any]) -> 'SerializedDAG': """Deserializes a DAG from a JSON object.""" dag = SerializedDAG(dag_id=encoded_dag['_dag_id']) for k, v in encoded_dag.items(): if k == "_downstream_task_ids": v = set(v) elif k == "tasks": SerializedBaseOperator._load_operator_extra_links = cls._load_operator_extra_links v = { task["task_id"]: SerializedBaseOperator.deserialize_operator(task) for task in v } k = "task_dict" elif k == "timezone": v = cls._deserialize_timezone(v) elif k == "dagrun_timeout": v = cls._deserialize_timedelta(v) elif k.endswith("_date"): v = cls._deserialize_datetime(v) elif k == "edge_info": # Value structure matches exactly pass elif k == "timetable": v = _decode_timetable(v) elif k in cls._decorated_fields: v = cls._deserialize(v) elif k == "params": v = cls._deserialize_params_dict(v) # else use v as it is setattr(dag, k, v) # A DAG is always serialized with only one of schedule_interval and # timetable. This back-populates the other to ensure the two attributes # line up correctly on the DAG instance. if "timetable" in encoded_dag: dag.schedule_interval = dag.timetable.summary else: dag.timetable = create_timetable(dag.schedule_interval, dag.timezone) # Set _task_group if "_task_group" in encoded_dag: dag._task_group = SerializedTaskGroup.deserialize_task_group( # type: ignore encoded_dag["_task_group"], None, dag.task_dict) else: # This must be old data that had no task_group. Create a root TaskGroup and add # all tasks to it. dag._task_group = TaskGroup.create_root(dag) for task in dag.tasks: dag.task_group.add(task) # Set has_on_*_callbacks to True if they exist in Serialized blob as False is the default if "has_on_success_callback" in encoded_dag: dag.has_on_success_callback = True if "has_on_failure_callback" in encoded_dag: dag.has_on_failure_callback = True keys_to_set_none = dag.get_serialized_fields() - encoded_dag.keys( ) - cls._CONSTRUCTOR_PARAMS.keys() for k in keys_to_set_none: setattr(dag, k, None) for task in dag.task_dict.values(): task.dag = dag serializable_task: BaseOperator = task for date_attr in ["start_date", "end_date"]: if getattr(serializable_task, date_attr) is None: setattr(serializable_task, date_attr, getattr(dag, date_attr)) if serializable_task.subdag is not None: setattr(serializable_task.subdag, 'parent_dag', dag) serializable_task.subdag.is_subdag = True for task_id in serializable_task.downstream_task_ids: # Bypass set_upstream etc here - it does more than we want dag.task_dict[task_id]._upstream_task_ids.add( serializable_task.task_id) return dag
def deserialize_dag(cls, encoded_dag: Dict[str, Any]) -> 'SerializedDAG': """Deserializes a DAG from a JSON object.""" dag = SerializedDAG(dag_id=encoded_dag['_dag_id']) for k, v in encoded_dag.items(): if k == "_downstream_task_ids": v = set(v) elif k == "tasks": # pylint: disable=protected-access SerializedBaseOperator._load_operator_extra_links = cls._load_operator_extra_links # pylint: enable=protected-access v = { task["task_id"]: SerializedBaseOperator.deserialize_operator(task) for task in v } k = "task_dict" elif k == "timezone": v = cls._deserialize_timezone(v) elif k in {"dagrun_timeout"}: v = cls._deserialize_timedelta(v) elif k.endswith("_date"): v = cls._deserialize_datetime(v) elif k in cls._decorated_fields: v = cls._deserialize(v) # else use v as it is setattr(dag, k, v) # Set _task_group # pylint: disable=protected-access if "_task_group" in encoded_dag: dag._task_group = SerializedTaskGroup.deserialize_task_group( # type: ignore encoded_dag["_task_group"], None, dag.task_dict) else: # This must be old data that had no task_group. Create a root TaskGroup and add # all tasks to it. dag._task_group = TaskGroup.create_root(dag) for task in dag.tasks: dag.task_group.add(task) # pylint: enable=protected-access # Set has_on_*_callbacks to True if they exist in Serialized blob as False is the default if "has_on_success_callback" in encoded_dag: dag.has_on_success_callback = True if "has_on_failure_callback" in encoded_dag: dag.has_on_failure_callback = True keys_to_set_none = dag.get_serialized_fields() - encoded_dag.keys( ) - cls._CONSTRUCTOR_PARAMS.keys() for k in keys_to_set_none: setattr(dag, k, None) setattr(dag, 'full_filepath', dag.fileloc) for task in dag.task_dict.values(): task.dag = dag serializable_task: BaseOperator = task for date_attr in ["start_date", "end_date"]: if getattr(serializable_task, date_attr) is None: setattr(serializable_task, date_attr, getattr(dag, date_attr)) if serializable_task.subdag is not None: setattr(serializable_task.subdag, 'parent_dag', dag) serializable_task.subdag.is_subdag = True for task_id in serializable_task.downstream_task_ids: # Bypass set_upstream etc here - it does more than we want # noqa: E501 # pylint: disable=protected-access dag.task_dict[task_id]._upstream_task_ids.add( serializable_task.task_id) return dag