def test_deserialization_schedule_interval(self, serialized_schedule_interval, expected_schedule_interval, expected_n_schedule_interval): serialized = { "__version": 1, "dag": { "default_args": { "__type": "dict", "__var": {} }, "_dag_id": "simple_dag", "fileloc": __file__, "tasks": [], "timezone": "UTC", "schedule_interval": serialized_schedule_interval, }, } SerializedDAG.validate_schema(serialized) dag = SerializedDAG.from_dict(serialized) self.assertEqual(dag.schedule_interval, expected_schedule_interval) self.assertEqual(dag.normalized_schedule_interval, expected_n_schedule_interval)
def test_serialization(self): """Serialization and deserialization should work for every DAG and Operator.""" dags = collect_dags() serialized_dags = {} for _, v in dags.items(): dag = SerializedDAG.to_dict(v) SerializedDAG.validate_schema(dag) serialized_dags[v.dag_id] = dag # Compares with the ground truth of JSON string. self.validate_serialized_dag(serialized_dags['simple_dag'], serialized_simple_dag_ground_truth)
def test_task_group_serialization(self): """ Test TaskGroup serialization/deserialization. """ from airflow.operators.dummy_operator import DummyOperator from airflow.utils.task_group import TaskGroup execution_date = datetime(2020, 1, 1) with DAG("test_task_group_serialization", start_date=execution_date) as dag: task1 = DummyOperator(task_id="task1") with TaskGroup("group234") as group234: _ = DummyOperator(task_id="task2") with TaskGroup("group34") as group34: _ = DummyOperator(task_id="task3") _ = DummyOperator(task_id="task4") task5 = DummyOperator(task_id="task5") task1 >> group234 group34 >> task5 dag_dict = SerializedDAG.to_dict(dag) SerializedDAG.validate_schema(dag_dict) json_dag = SerializedDAG.from_json(SerializedDAG.to_json(dag)) self.validate_deserialized_dag(json_dag, dag) serialized_dag = SerializedDAG.deserialize_dag( SerializedDAG.serialize_dag(dag)) assert serialized_dag.task_group.children assert serialized_dag.task_group.children.keys( ) == dag.task_group.children.keys() def check_task_group(node): try: children = node.children.values() except AttributeError: # Round-trip serialization and check the result expected_serialized = SerializedBaseOperator.serialize_operator( dag.get_task(node.task_id)) expected_deserialized = SerializedBaseOperator.deserialize_operator( expected_serialized) expected_dict = SerializedBaseOperator.serialize_operator( expected_deserialized) assert node assert SerializedBaseOperator.serialize_operator( node) == expected_dict return for child in children: check_task_group(child) check_task_group(serialized_dag.task_group)
def test_write_dag(self): """DAGs can be written into database.""" example_dags = self._write_example_dags() with create_session() as session: for dag in example_dags.values(): assert SDM.has_dag(dag.dag_id) result = session.query(SDM.fileloc, SDM.data).filter(SDM.dag_id == dag.dag_id).one() assert result.fileloc == dag.full_filepath # Verifies JSON schema. SerializedDAG.validate_schema(result.data)
def test_edge_info_serialization(self): """ Tests edge_info serialization/deserialization. """ from airflow.operators.dummy import DummyOperator from airflow.utils.edgemodifier import Label with DAG("test_edge_info_serialization", start_date=datetime(2020, 1, 1)) as dag: task1 = DummyOperator(task_id="task1") task2 = DummyOperator(task_id="task2") task1 >> Label("test label") >> task2 # pylint: disable=W0106 dag_dict = SerializedDAG.to_dict(dag) SerializedDAG.validate_schema(dag_dict) json_dag = SerializedDAG.from_json(SerializedDAG.to_json(dag)) self.validate_deserialized_dag(json_dag, dag) serialized_dag = SerializedDAG.deserialize_dag(SerializedDAG.serialize_dag(dag)) assert serialized_dag.edge_info == dag.edge_info