def test_on_parse_finish_decision_should_not_remove_end_node(self):
        workflow = Workflow(input_directory_path=None,
                            output_directory_path=None,
                            dag_name=None)

        mapper = self._get_end_mapper("end_task")

        workflow.nodes["first_task"] = ParsedNode(
            mock.Mock(spec=DecisionMapper, last_task_id="first_task"))
        workflow.nodes["second_task"] = ParsedNode(
            mock.Mock(spec=BaseMapper, last_task_id="second_task"))
        workflow.nodes["end_task"] = ParsedNode(mapper)

        workflow.relations = {
            Relation(from_task_id="first_task", to_task_id="end_task"),
            Relation(from_task_id="second_task", to_task_id="end_task"),
        }

        mapper.on_parse_finish(workflow)

        self.assertEqual(set(workflow.nodes.keys()),
                         {"first_task", "second_task", "end_task"})
        self.assertEqual(
            workflow.relations,
            {Relation(from_task_id="first_task", to_task_id="end_task")})
Ejemplo n.º 2
0
class SubWorkflowTemplateTestCase(BaseTestCases.BaseTemplateTestCase):
    TEMPLATE_NAME = "subworkflow.tpl"

    DEFAULT_TEMPLATE_PARAMS = dict(
        dependencies={"import awesome_stuff"},
        task_groups=[
            TaskGroup(
                name="AAA",
                tasks=[
                    Task(task_id="first_task", template_name="dummy.tpl"),
                    Task(task_id="second_task", template_name="dummy.tpl"),
                ],
                relations=[
                    Relation(from_task_id="first_task",
                             to_task_id="second_task")
                ],
            )
        ],
        job_properties={"user.name": "USER"},
        config={"key": "value"},
        relations={Relation(from_task_id="TASK_1", to_task_id="TASK_2")},
    )

    def test_green_path(self):
        res = render_template(self.TEMPLATE_NAME,
                              **self.DEFAULT_TEMPLATE_PARAMS)
        self.assertValidPython(res)
Ejemplo n.º 3
0
class SubWorkflowTemplateTestCase(TestCase, TemplateTestMixin):
    TEMPLATE_NAME = "subworkflow.tpl"

    DEFAULT_TEMPLATE_PARAMS = dict(
        dependencies={"import awesome_stuff"},
        nodes=[
            ParsedActionNode(
                mock.MagicMock(spec=DummyMapper),
                tasks=[
                    Task(task_id="first_task", template_name="dummy.tpl"),
                    Task(task_id="second_task", template_name="dummy.tpl"),
                ],
                relations=[
                    Relation(from_task_id="first_task",
                             to_task_id="second_task")
                ],
            )
        ],
        job_properties={"user.name": "USER"},
        config={"key": "value"},
        relations={Relation(from_task_id="TASK_1", to_task_id="TASK_2")},
    )

    def test_green_path(self):
        res = render_template(self.TEMPLATE_NAME,
                              **self.DEFAULT_TEMPLATE_PARAMS)
        self.assertValidPython(res)
class SubWorkflowTemplateTestCase(TestCase, TemplateTestMixin):
    TEMPLATE_NAME = "subworkflow.tpl"

    DEFAULT_TEMPLATE_PARAMS = dict(
        dependencies=["import awesome_stuff"],
        nodes=[
            ParsedNode(
                mock.MagicMock(spec=DummyMapper),
                tasks=[
                    Task(task_id="first_task", template_name="dummy.tpl"),
                    Task(task_id="second_task", template_name="dummy.tpl"),
                ],
                relations=[
                    Relation(from_task_id="first_task",
                             to_task_id="second_task")
                ],
            )
        ],
        params={"user.name": "USER"},
        relations={Relation(from_task_id="TASK_1", to_task_id="TASK_2")},
        schedule_interval=None,
        start_days_ago=None,
    )

    def test_green_path(self):
        res = render_template(self.TEMPLATE_NAME,
                              **self.DEFAULT_TEMPLATE_PARAMS)
        self.assertValidPython(res)
    def test_write_dag_file(self, render_template_mock):
        relations = {Relation(from_task_id="TASK_1", to_task_id="TASK_2")}
        nodes = dict(TASK_1=ParsedActionNode(DummyMapper(Element("dummy"), name="TASK_1")))
        dependencies = {"import awesome_stuff"}
        workflow = Workflow(
            input_directory_path="/tmp/input_directory",
            output_directory_path="/tmp/input_directory",
            dag_name="test_dag",
            relations=relations,
            nodes=nodes,
            dependencies=dependencies,
        )

        content = self.converter.render_workflow(workflow=workflow)

        render_template_mock.assert_called_once_with(
            dag_name="test_dag",
            dependencies={"import awesome_stuff"},
            nodes=[nodes["TASK_1"]],
            params={"user.name": "USER"},
            relations={Relation(from_task_id="TASK_1", to_task_id="TASK_2")},
            schedule_interval=None,
            start_days_ago=None,
            template_name="workflow.tpl",
        )

        self.assertEqual(content, "TEXT_CONTENT")
Ejemplo n.º 6
0
    def add_state_handler_if_needed(self):
        """
        Add additional tasks and relations to handle error and ok flow.

        If the error path is specified, additional relations and task are added to handle
        the error state.
        If the error path and the ok path is specified, additional relations and task are added
        to handle the ok path and the error path.
        If the error path and the ok path is not-specified, no action is performed.
        """
        if not self.error_downstream_name:
            return
        error_handler_task_id = self.name + "_error"
        error_handler = Task(
            task_id=error_handler_task_id, template_name="dummy.tpl", trigger_rule=TriggerRule.ONE_FAILED
        )
        self.error_handler_task = error_handler
        new_relations = (
            Relation(from_task_id=t.task_id, to_task_id=error_handler_task_id, is_error=True)
            for t in self.tasks
        )
        self.relations.extend(new_relations)

        if not self.downstream_names:
            return
        ok_handler_task_id = self.name + "_ok"
        ok_handler = Task(
            task_id=ok_handler_task_id, template_name="dummy.tpl", trigger_rule=TriggerRule.ONE_SUCCESS
        )
        self.ok_handler_task = ok_handler
        self.relations.append(Relation(from_task_id=self.tasks[-1].task_id, to_task_id=ok_handler_task_id))
Ejemplo n.º 7
0
    def test_on_parse_finish(self):
        workflow = Workflow(input_directory_path=None,
                            output_directory_path=None,
                            dag_name=None)

        mapper = self._get_kill_mapper(name="fail_task")

        workflow.nodes["task"] = ParsedNode(mock.Mock(autospec=BaseMapper))
        workflow.nodes["fail_task"] = ParsedNode(mapper)
        workflow.nodes["success_task"] = ParsedNode(
            mock.Mock(autospec=BaseMapper))
        workflow.nodes["success_task"].set_is_ok(True)
        workflow.nodes["fail_task"].set_is_error(True)

        workflow.relations = {
            Relation(from_task_id="task", to_task_id="fail_task"),
            Relation(from_task_id="task", to_task_id="success_task"),
        }

        mapper.on_parse_finish(workflow)

        self.assertEqual(set(workflow.nodes.keys()), {"task", "success_task"})
        self.assertEqual(
            workflow.relations,
            {Relation(from_task_id="task", to_task_id="success_task")})
Ejemplo n.º 8
0
 def test_multiple(self):
     relations = fs_mapper.chain([
         Task(task_id="task_1", template_name=""),
         Task(task_id="task_2", template_name=""),
         Task(task_id="task_3", template_name=""),
         Task(task_id="task_4", template_name=""),
     ])
     self.assertEqual(
         [
             Relation(from_task_id="task_1", to_task_id="task_2"),
             Relation(from_task_id="task_2", to_task_id="task_3"),
             Relation(from_task_id="task_3", to_task_id="task_4"),
         ],
         relations,
     )
Ejemplo n.º 9
0
    def test_to_tasks_and_relations_should_parse_prepare_element(self):
        self.hive_node.append(ET.fromstring(FRAGMENT_QUERY))
        self.hive_node.append(ET.fromstring(FRAGMENT_PREPARE))

        mapper = self._get_hive_mapper(job_properties=self.job_properties,
                                       config=self.config)
        mapper.on_parse_node()

        tasks, relations = mapper.to_tasks_and_relations()

        self.assertEqual(2, len(tasks))
        self.assertEqual(
            Task(
                task_id="test_id_prepare",
                template_name="prepare.tpl",
                template_params={
                    "delete":
                    "/user/TEST_USERNAME/TEST_EXAMPLE_ROOT/apps/pig/output",
                    "mkdir":
                    "/user/TEST_USERNAME/TEST_EXAMPLE_ROOT/apps/pig/created-folder",
                },
            ),
            tasks[0],
        )

        self.assertEqual(
            [Relation(from_task_id="test_id_prepare", to_task_id="test_id")],
            relations)
Ejemplo n.º 10
0
    def test_task_and_relations(self):
        # Given
        mapper = _get_distcp_mapper(self.distcp_node,
                                    job_properties=EXAMPLE_JOB_PROPERTIES,
                                    config=EXAMPLE_CONFIG_PROPERTIES)

        # When
        mapper.on_parse_node()
        tasks, relations = mapper.to_tasks_and_relations()

        # Then
        self.assertEqual(mapper.oozie_node, self.distcp_node)
        self.assertIsNotNone(tasks)
        self.assertIsNotNone(relations)
        self.assertEqual(2, len(tasks))
        self.assertEqual(1, len(relations))
        self.assertEqual(
            [
                Task(
                    task_id="distcp_prepare",
                    template_name="prepare.tpl",
                    trigger_rule="one_success",
                    template_params={
                        "delete": "/tmp/d_path",
                        "mkdir": None
                    },
                ),
                Task(
                    task_id="distcp",
                    template_name="distcp.tpl",
                    trigger_rule="one_success",
                    template_params={
                        "props":
                        PropertySet(
                            config={
                                "dataproc_cluster": "my-cluster",
                                "gcp_region": "europe-west3"
                            },
                            job_properties={
                                "nameNode1": "hdfs://localhost:8081",
                                "nameNode2": "hdfs://localhost:8082",
                            },
                            action_node_properties={
                                "oozie.launcher.mapreduce.job.hdfs-servers":
                                "{{nameNode1}} ,{{nameNode2}}"
                            },
                        ),
                        "distcp_command":
                        "--class=org.apache.hadoop.tools.DistCp -- -update -skipcrccheck "
                        "-strategy dynamic '{{nameNode1}}/path/to/input file.txt' "
                        "'{{nameNode2}}/path/to/output-file.txt'",
                    },
                ),
            ],
            tasks,
        )
        self.assertEqual([
            Relation(from_task_id=f"{mapper.name}_prepare",
                     to_task_id=mapper.name)
        ], relations)
 def test_convert(self, sort_imports_mock, autoflake_fix_file_mock, black_mock, parse_workflow_mock):
     # Given
     workflow = Workflow(
         dag_name="A",
         input_directory_path="in_dir",
         output_directory_path="out_dir",
         relations={Relation(from_task_id="AAA", to_task_id="BBB")},
         nodes=dict(AAA=ParsedActionNode(DummyMapper(Element("dummy"), name="AAA"))),
         dependencies={"import AAAA"},
     )
     parse_workflow_mock.return_value = workflow
     # When
     self.converter.convert()
     # Then
     parse_workflow_mock.assert_called_once_with()
     black_mock.format_file_in_place.assert_called_once_with(
         Path("/tmp/test_dag.py"), fast=mock.ANY, mode=mock.ANY, write_back=mock.ANY
     )
     autoflake_fix_file_mock.assert_called_once_with(
         "/tmp/test_dag.py",
         args=AutoflakeArgs(
             remove_all_unused_imports=True,
             ignore_init_module_imports=False,
             remove_duplicate_keys=False,
             remove_unused_variables=True,
             in_place=True,
             imports=None,
             expand_star_imports=False,
             check=False,
         ),
         standard_out=sys.stdout,
     )
     sort_imports_mock.assert_called_once_with("/tmp/test_dag.py")
    def test_convert_nodes(self):
        tasks_1 = [
            Task(task_id="first_task", template_name="dummy.tpl"),
            Task(task_id="second_task", template_name="dummy.tpl"),
        ]
        relations_1 = {
            Relation(from_task_id="first_task", to_task_id="tasks_2")
        }
        tasks_2 = [Task(task_id="third_task", template_name="dummy.tpl")]
        relations_2 = {}

        mapper_1 = mock.MagicMock(
            **{"to_tasks_and_relations.return_value": (tasks_1, relations_1)})
        mapper_2 = mock.MagicMock(
            **{"to_tasks_and_relations.return_value": (tasks_2, relations_2)})

        node_1 = ParsedNode(mapper=mapper_1)
        node_2 = ParsedNode(mapper=mapper_2)
        nodes = dict(TASK_1=node_1, TASK_2=node_2)

        self.converter.convert_nodes(nodes=nodes)
        self.assertIs(node_1.tasks, tasks_1)
        self.assertIs(node_2.tasks, tasks_2)
        self.assertIs(node_1.relations, relations_1)
        self.assertIs(node_2.relations, relations_2)
    def test_to_tasks_and_relations(self):
        job_properties = {"nameNode": "hdfs://localhost:9020/", "queueName": "default"}
        config = {"dataproc_cluster": "my-cluster", "gcp_region": "europe-west3"}
        mapper = self._get_shell_mapper(job_properties=job_properties, config=config)
        mapper.on_parse_node()
        tasks, relations = mapper.to_tasks_and_relations()

        self.assertEqual(
            [
                Task(
                    task_id="test_id_prepare",
                    template_name="prepare.tpl",
                    template_params={
                        "delete": "//examples/output-data/demo/pig-node "
                        "//examples/output-data/demo/pig-node2",
                        "mkdir": "//examples/input-data/demo/pig-node "
                        "//examples/input-data/demo/pig-node2",
                    },
                ),
                Task(
                    task_id="test_id",
                    template_name="shell.tpl",
                    template_params={
                        "pig_command": "sh echo arg1 arg2",
                        "action_node_properties": {
                            "mapred.job.queue.name": "default",
                            "mapred.map.output.compress": "false",
                        },
                    },
                ),
            ],
            tasks,
        )
        self.assertEqual(relations, [Relation(from_task_id="test_id_prepare", to_task_id="test_id")])
Ejemplo n.º 14
0
 def to_tasks_and_relations(self):
     prepare_command = self.get_prepare_command(self.oozie_node,
                                                self.params)
     tasks = [
         Task(
             task_id=self.name + "_prepare",
             template_name="prepare.tpl",
             trigger_rule=self.trigger_rule,
             template_params=dict(prepare_command=prepare_command),
         ),
         Task(
             task_id=self.name,
             template_name="pig.tpl",
             trigger_rule=self.trigger_rule,
             template_params=dict(
                 properties=self.properties,
                 params_dict=self.params_dict,
                 script_file_name=self.script_file_name,
             ),
         ),
     ]
     relations = [
         Relation(from_task_id=self.name + "_prepare", to_task_id=self.name)
     ]
     return tasks, relations
 def convert_relations(workflow: Workflow) -> None:
     logging.info("Converting relations between nodes.")
     for p_node in workflow.nodes.values():
         for downstream in p_node.get_downstreams():
             relation = Relation(
                 from_task_id=p_node.last_task_id,
                 to_task_id=workflow.nodes[downstream].first_task_id)
             workflow.relations.add(relation)
         error_downstream = p_node.get_error_downstream_name()
         if error_downstream:
             relation = Relation(
                 from_task_id=p_node.last_task_id,
                 to_task_id=workflow.nodes[error_downstream].first_task_id,
                 is_error=True,
             )
             workflow.relations.add(relation)
Ejemplo n.º 16
0
 def to_tasks_and_relations(self):
     tasks = [
         Task(
             task_id=self.name,
             template_name="mapreduce.tpl",
             trigger_rule=self.trigger_rule,
             template_params=dict(
                 properties=self.properties,
                 params_dict=self.params_dict,
                 hdfs_files=self.hdfs_files,
                 hdfs_archives=self.hdfs_archives,
             ),
         )
     ]
     relations = []
     if self.has_prepare(self.oozie_node):
         prepare_command = self.get_prepare_command(self.oozie_node, self.params)
         tasks.insert(
             0,
             Task(
                 task_id=self.name + "_prepare",
                 template_name="prepare.tpl",
                 trigger_rule=self.trigger_rule,
                 template_params=dict(prepare_command=prepare_command),
             ),
         )
         relations = [Relation(from_task_id=self.name + "_prepare", to_task_id=self.name)]
     return tasks, relations
class WorkflowTemplateTestCase(TestCase, TemplateTestMixin):
    TEMPLATE_NAME = "workflow.tpl"

    DEFAULT_TEMPLATE_PARAMS = dict(
        dag_name="test_dag",
        dependencies={"import awesome_stuff"},
        task_groups=[
            TaskGroup(
                name="TASK_GROUP",
                tasks=[
                    Task(task_id="first_task", template_name="dummy.tpl"),
                    Task(task_id="second_task", template_name="dummy.tpl"),
                ],
            )
        ],
        job_properties={"user.name": "USER"},
        config={},
        relations={Relation(from_task_id="TASK_1", to_task_id="TASK_2")},
        schedule_interval=3,
        start_days_ago=3,
        task_map={"oozie-task": ["airflow-task"]},
    )

    def test_green_path(self):
        res = render_template(self.TEMPLATE_NAME, **self.DEFAULT_TEMPLATE_PARAMS)
        self.assertValidPython(res)
    def test_to_tasks_and_relations(self):
        params = {
            "dataproc_cluster": "my-cluster",
            "gcp_region": "europe-west3",
            "nameNode": "hdfs://localhost:9020/",
        }
        mapper = self._get_shell_mapper(params=params)
        tasks, relations = mapper.to_tasks_and_relations()

        self.assertEqual(
            tasks,
            [
                Task(
                    task_id="test_id_prepare",
                    template_name="prepare.tpl",
                    template_params={
                        "prepare_command":
                        "$DAGS_FOLDER/../data/prepare.sh -c my-cluster -r europe-west3 "
                        '-d "//examples/output-data/demo/pig-node //examples/output-data'
                        '/demo/pig-node2" -m "//examples/input-data/demo/pig-node '
                        '//examples/input-data/demo/pig-node2"'
                    },
                ),
                Task(
                    task_id="test_id",
                    template_name="shell.tpl",
                    template_params={"pig_command": "sh 'echo arg1 arg2'"},
                ),
            ],
        )
        self.assertEqual(
            relations,
            [Relation(from_task_id="test_id_prepare", to_task_id="test_id")])
Ejemplo n.º 19
0
 def to_tasks_and_relations(self):
     tasks = [
         Task(
             task_id=self.name,
             template_name="git.tpl",
             template_params=dict(bash_command=self.bash_command),
         )
     ]
     relations = []
     if self.has_prepare(self.oozie_node):
         prepare_command = self.get_prepare_command(self.oozie_node,
                                                    self.params)
         tasks.insert(
             0,
             Task(
                 task_id=self.name + "_prepare",
                 template_name="prepare.tpl",
                 template_params=dict(prepare_command=prepare_command),
             ),
         )
         relations = [
             Relation(from_task_id=self.name + "_prepare",
                      to_task_id=self.name)
         ]
     return tasks, relations
    def test_to_tasks_and_relations(self):
        job_properties = {"nameNode": "hdfs://"}
        config = {
            "dataproc_cluster": "my-cluster",
            "gcp_region": "europe-west3"
        }
        mapper = self._get_pig_mapper(job_properties=job_properties,
                                      config=config)
        mapper.on_parse_node()
        tasks, relations = mapper.to_tasks_and_relations()

        self.assertEqual(
            tasks,
            [
                Task(
                    task_id="test_id_prepare",
                    template_name="prepare.tpl",
                    template_params={
                        "delete":
                        "/examples/output-data/demo/pig-node /examples/output-data/demo/pig-node2",
                        "mkdir":
                        "/examples/input-data/demo/pig-node /examples/input-data/demo/pig-node2",
                    },
                ),
                Task(
                    task_id="test_id",
                    template_name="pig.tpl",
                    template_params={
                        "props":
                        PropertySet(
                            config={
                                "dataproc_cluster": "my-cluster",
                                "gcp_region": "europe-west3"
                            },
                            job_properties={"nameNode": "hdfs://"},
                            action_node_properties={
                                "mapred.job.queue.name": "${queueName}",
                                "mapred.map.output.compress": "false",
                            },
                        ),
                        "params_dict": {
                            "INPUT":
                            "/user/${wf:user()}/${examplesRoot}/input-data/text",
                            "OUTPUT":
                            "/user/${wf:user()}/${examplesRoot}/output-data/demo/pig-node",
                        },
                        "script_file_name":
                        "id.pig",
                        "action_node_properties": {
                            "mapred.job.queue.name": "${queueName}",
                            "mapred.map.output.compress": "false",
                        },
                    },
                ),
            ],
        )
        self.assertEqual(
            [Relation(from_task_id="test_id_prepare", to_task_id="test_id")],
            relations)
Ejemplo n.º 21
0
    def _get_relations(self):
        """
        Returns the list of Airflow relations that are the result of mapping

        :return: list of relations
        """
        return ([
            Relation(from_task_id=self.name + "_prepare", to_task_id=self.name)
        ] if self.has_prepare(self.oozie_node) else [])
Ejemplo n.º 22
0
 def convert_relations(self) -> None:
     logging.info("Converting relations between tasks groups.")
     for task_group in self.workflow.task_groups.values():
         for downstream in task_group.downstream_names:
             relation = Relation(
                 from_task_id=task_group.last_task_id_of_ok_flow,
                 to_task_id=self.workflow.task_groups[downstream].
                 first_task_id,
             )
             self.workflow.task_group_relations.add(relation)
         error_downstream = task_group.error_downstream_name
         if error_downstream:
             relation = Relation(
                 from_task_id=task_group.last_task_id_of_error_flow,
                 to_task_id=self.workflow.task_groups[error_downstream].
                 first_task_id,
                 is_error=True,
             )
             self.workflow.task_group_relations.add(relation)
Ejemplo n.º 23
0
    def test_convert_to_text_with_prepare_node(self):
        git_node = ET.fromstring(EXAMPLE_XML)

        mapper = self._get_git_mapper(git_node)
        mapper.on_parse_node()

        tasks, relations = mapper.to_tasks_and_relations()

        self.assertEqual(
            [
                Task(
                    task_id="test_id_prepare",
                    template_name="prepare.tpl",
                    trigger_rule="one_success",
                    template_params={
                        "delete": "/tmp/d_path",
                        "mkdir": "/tmp/mk_path"
                    },
                ),
                Task(
                    task_id="test_id",
                    template_name="git.tpl",
                    trigger_rule="one_success",
                    template_params={
                        "git_uri":
                        "https://github.com/apache/oozie",
                        "git_branch":
                        "{{branch}}",
                        "destination_path":
                        "/my_git_repo_directory",
                        "key_path":
                        "/awesome-key/",
                        "props":
                        PropertySet(
                            config={
                                "dataproc_cluster": "my-cluster",
                                "gcp_region": "europe-west3"
                            },
                            job_properties={
                                "branch": "my-awesome-branch",
                                "nameNode": "hdfs://",
                                "userName": "******",
                                "examplesRoot": "examples",
                            },
                            action_node_properties={},
                        ),
                    },
                ),
            ],
            tasks,
        )

        self.assertEqual(
            [Relation(from_task_id="test_id_prepare", to_task_id="test_id")],
            relations)
Ejemplo n.º 24
0
    def test_prepend_task_empty_relations(self):
        task_1 = Task(task_id=TEST_MAPPER_NAME + "_1", template_name="pig.tpl")
        task_2 = Task(task_id=TEST_MAPPER_NAME + "_2", template_name="pig.tpl")

        tasks, relations = ActionMapper.prepend_task(task_to_prepend=task_1,
                                                     tasks=[task_2],
                                                     relations=[])
        self.assertEqual([task_1, task_2], tasks)
        self.assertEqual([
            Relation(from_task_id="mapper_name_1", to_task_id="mapper_name_2")
        ], relations)
Ejemplo n.º 25
0
    def create_relations(self) -> None:
        """
        Given a dictionary of task_ids and ParsedNodes,
        returns a set of logical connectives for each task in Airflow.

        :return: Set with strings of task's downstream nodes.
        """
        logging.info("Parsing relations between operators.")
        for p_node in self.workflow.nodes.values():
            for downstream in p_node.get_downstreams():
                relation = Relation(
                    from_task_id=p_node.last_task_id, to_task_id=self.workflow.nodes[downstream].first_task_id
                )
                self.workflow.relations.add(relation)
            error_downstream = p_node.get_error_downstream_name()
            if error_downstream:
                relation = Relation(
                    from_task_id=p_node.last_task_id,
                    to_task_id=self.workflow.nodes[error_downstream].first_task_id,
                )
                self.workflow.relations.add(relation)
    def test_on_parse_finish_decision_should_not_remove_end_node(self):
        workflow = Workflow(input_directory_path="", output_directory_path="", dag_name="DAG_NAME_B")

        mapper = self._get_end_mapper("end_task")

        workflow.nodes["first_task"] = ParsedActionNode(
            mock.Mock(spec=DecisionMapper), tasks=[self._get_dummy_task("first_task")]
        )
        workflow.nodes["second_task"] = ParsedActionNode(
            mock.Mock(spec=BaseMapper), tasks=[self._get_dummy_task("second_task")]
        )
        workflow.nodes["end_task"] = ParsedActionNode(mapper, tasks=[self._get_dummy_task("end_task")])

        workflow.relations = {
            Relation(from_task_id="first_task", to_task_id="end_task"),
            Relation(from_task_id="second_task", to_task_id="end_task"),
        }

        mapper.on_parse_finish(workflow)

        self.assertEqual({"first_task", "second_task", "end_task"}, set(workflow.nodes.keys()))
        self.assertEqual({Relation(from_task_id="first_task", to_task_id="end_task")}, workflow.relations)
Ejemplo n.º 27
0
 def _create_workflow(nodes=None):
     return Workflow(
         dag_name="A",
         input_directory_path="in_dir",
         output_directory_path="out_dir",
         relations={Relation(from_task_id="DAG_NAME_A", to_task_id="DAG_NAME_B")},
         nodes=dict(
             AAA=ParsedActionNode(DummyMapper(Element("dummy"), name="DAG_NAME_A", dag_name="DAG_NAME_B"))
         )
         if not nodes
         else nodes,
         dependencies={"import IMPORT"},
     )
Ejemplo n.º 28
0
def _create_workflow():
    return Workflow(
        dag_name="DAG_NAME",
        input_directory_path="/tmp/input",
        output_directory_path="/tmp/output",
        task_group_relations={
            Relation(from_task_id="DAG_NAME_A", to_task_id="DAG_NAME_B")
        },
        task_groups=dict(TASK_NAME=TaskGroup(
            name="DAG_NAME_A",
            tasks=[Task(task_id="task_name", template_name="dummy.tpl")])),
        dependencies={"import IMPORT"},
    )
Ejemplo n.º 29
0
 def _create_workflow():
     return Workflow(
         dag_name="DAG_NAME",
         input_directory_path="/tmp/input",
         output_directory_path="/tmp/output",
         task_group_relations={
             Relation(from_task_id="DAG_NAME_A", to_task_id="DAG_NAME_B")
         },
         nodes=dict(AAA=OozieActionNode(
             DummyMapper(Element("dummy"),
                         name="DAG_NAME_A",
                         dag_name="DAG_NAME_B"))),
         dependencies={"import IMPORT"},
     )
 def test_create_dag_file(self, open_mock, _):
     # Given
     workflow = Workflow(
         dag_name="A",
         input_directory_path="in_dir",
         output_directory_path="out_dir",
         relations={Relation(from_task_id="AAA", to_task_id="BBB")},
         nodes=dict(AAA=ParsedActionNode(DummyMapper(Element("dummy"), name="AAA"))),
         dependencies={"import AAAA"},
     )
     # When
     self.converter.create_dag_file(workflow)
     # Then
     open_mock.assert_called_once_with("/tmp/test_dag.py", "w")