Beispiel #1
0
    def test_on_parse_finish(self):
        workflow = Workflow(input_directory_path="",
                            output_directory_path="",
                            dag_name="DAG_NAME_B")

        mapper = self._get_kill_mapper(name="fail_task")

        workflow.nodes["task"] = ParsedActionNode(
            mock.Mock(autospec=BaseMapper))
        workflow.nodes["fail_task"] = ParsedActionNode(mapper)
        workflow.nodes["success_task"] = ParsedActionNode(
            mock.Mock(autospec=BaseMapper))
        workflow.nodes["success_task"].is_ok = True
        workflow.nodes["fail_task"].is_error = True

        workflow.relations = {
            Relation(from_task_id="task", to_task_id="fail_task"),
            Relation(from_task_id="task", to_task_id="success_task"),
        }

        mapper.on_parse_finish(workflow)

        self.assertEqual(set(workflow.nodes.keys()), {"task", "success_task"})
        self.assertEqual(
            workflow.relations,
            {Relation(from_task_id="task", to_task_id="success_task")})
Beispiel #2
0
    def parse_start_node(self, start_node):
        """
        The start node is the entry point for a workflow job, it indicates the
        first workflow node the workflow job must transition to.

        When a workflow is started, it automatically transitions to the
        node specified in the start.

        A workflow definition must have one start node.
        """
        # Theoretically this could cause conflicts, but it is very unlikely
        start_name = "start_node_" + str(uuid.uuid4())[:4]
        mapper = StartMapper(
            oozie_node=start_node,
            name=start_name,
            dag_name=self.workflow.dag_name,
            props=self.props,
            trigger_rule=TriggerRule.DUMMY,
        )

        p_node = ParsedActionNode(mapper)
        p_node.add_downstream_node_name(start_node.attrib["to"])

        mapper.on_parse_node()

        logging.info(f"Parsed {mapper.name} as Start Node.")
        self.workflow.nodes[start_name] = p_node
Beispiel #3
0
    def test_on_parse_finish_decision_should_not_remove_end_node(self):
        workflow = Workflow(input_directory_path=None,
                            output_directory_path=None,
                            dag_name=None)

        mapper = self._get_end_mapper("end_task")

        workflow.nodes["first_task"] = ParsedActionNode(
            mock.Mock(spec=DecisionMapper, last_task_id="first_task"))
        workflow.nodes["second_task"] = ParsedActionNode(
            mock.Mock(spec=BaseMapper, last_task_id="second_task"))
        workflow.nodes["end_task"] = ParsedActionNode(mapper)

        workflow.relations = {
            Relation(from_task_id="first_task", to_task_id="end_task"),
            Relation(from_task_id="second_task", to_task_id="end_task"),
        }

        mapper.on_parse_finish(workflow)

        self.assertEqual(set(workflow.nodes.keys()),
                         {"first_task", "second_task", "end_task"})
        self.assertEqual(
            workflow.relations,
            {Relation(from_task_id="first_task", to_task_id="end_task")})
Beispiel #4
0
    def test_should_keep_node_in_correct_flow(self):
        transformer = RemoveKillTransformer()

        workflow = Workflow(input_directory_path="",
                            output_directory_path="",
                            dag_name="DAG_NAME_B")

        first_mapper = mock.Mock(spec=BaseMapper)
        first_mapper.name = "first_task"
        third_mapper = mock.Mock(spec=KillMapper)
        third_mapper.name = "third_task"

        first_node = ParsedActionNode(first_mapper)
        third_node = ParsedActionNode(third_mapper)

        first_node.downstream_names = [third_mapper.name]

        workflow.nodes[first_mapper.name] = first_node
        workflow.nodes[third_mapper.name] = third_node

        transformer.process_workflow(workflow)

        self.assertEqual({first_mapper.name, third_mapper.name},
                         set(workflow.nodes.keys()))
        self.assertEqual([third_node.name], first_node.downstream_names)
        self.assertEqual([], third_node.downstream_names)
Beispiel #5
0
    def test_convert_nodes(self):
        converter = self._create_converter()

        tasks_1 = [
            Task(task_id="first_task", template_name="dummy.tpl"),
            Task(task_id="second_task", template_name="dummy.tpl"),
        ]
        relations_1 = {
            Relation(from_task_id="first_task", to_task_id="tasks_2")
        }
        tasks_2 = [Task(task_id="third_task", template_name="dummy.tpl")]
        relations_2 = {}

        mapper_1 = mock.MagicMock(
            **{"to_tasks_and_relations.return_value": (tasks_1, relations_1)})
        mapper_2 = mock.MagicMock(
            **{"to_tasks_and_relations.return_value": (tasks_2, relations_2)})

        node_1 = ParsedActionNode(mapper=mapper_1)
        node_2 = ParsedActionNode(mapper=mapper_2)
        nodes = dict(TASK_1=node_1, TASK_2=node_2)
        converter.workflow.nodes = nodes

        converter.convert_nodes()

        self.assertIs(node_1.tasks, tasks_1)
        self.assertIs(node_2.tasks, tasks_2)
        self.assertIs(node_1.relations, relations_1)
        self.assertIs(node_2.relations, relations_2)
Beispiel #6
0
    def test_should_keep_connected_nodes_in_error_state(self):
        """
        Graph before:

        .. graphviz::

           digraph foo {
              S -> A
           }

        Graph after:

        .. graphviz::

           digraph foo {
              S -> A
           }

        Where:
        A - first_task
        S - start_task
        """
        transformer = RemoveInaccessibleNodeTransformer()

        workflow = Workflow(input_directory_path="",
                            output_directory_path="",
                            dag_name="DAG_NAME_B")

        first_mapper = mock.Mock(spec=BaseMapper)
        first_mapper.name = "first_task"

        start_mapper = mock.Mock(spec=StartMapper)
        start_mapper.name = "start_task"

        first_node = ParsedActionNode(
            mapper=first_mapper,
            tasks=[self._get_dummy_task(first_mapper.name)])
        start_node = ParsedActionNode(
            mapper=start_mapper,
            tasks=[self._get_dummy_task(start_mapper.name)])

        start_node.error_xml = first_node.name

        workflow.nodes[first_mapper.name] = first_node
        workflow.nodes[start_mapper.name] = start_node

        transformer.process_workflow(workflow)

        self.assertEqual({start_mapper.name, first_mapper.name},
                         set(workflow.nodes.keys()))
        self.assertEqual([], start_node.downstream_names)
        self.assertEqual([], first_node.downstream_names)
        self.assertEqual(first_mapper.name, start_node.error_xml)
        self.assertEqual(None, first_node.error_xml)
    def test_on_parse_finish_simple_should_remove_end_node(self):
        workflow = Workflow(input_directory_path="", output_directory_path="", dag_name="DAG_NAME_B")

        mapper = self._get_end_mapper("second_task")

        workflow.nodes["first_task"] = ParsedActionNode(mock.Mock(autospec=BaseMapper))
        workflow.nodes["second_task"] = ParsedActionNode(mapper)

        workflow.relations = {Relation(from_task_id="first_task", to_task_id="second_task")}

        mapper.on_parse_finish(workflow)

        self.assertEqual({"first_task"}, set(workflow.nodes.keys()))
        self.assertEqual(set(), workflow.relations)
Beispiel #8
0
    def test_convert_dependencies(self):
        converter = self._create_converter()

        mapper_1 = mock.MagicMock(**{"required_imports.return_value": {"import A", "import B"}})
        mapper_2 = mock.MagicMock(**{"required_imports.return_value": ("import B", "import C")})

        node_1 = ParsedActionNode(mapper=mapper_1)
        node_2 = ParsedActionNode(mapper=mapper_2)
        nodes = dict(TASK_1=node_1, TASK_2=node_2)

        workflow = self._create_workflow(nodes)
        converter.convert_dependencies(workflow)

        self.assertEqual({"import IMPORT", "import C", "import B", "import A"}, workflow.dependencies)
 def test_convert(self, sort_imports_mock, autoflake_fix_file_mock, black_mock, parse_workflow_mock):
     # Given
     workflow = Workflow(
         dag_name="A",
         input_directory_path="in_dir",
         output_directory_path="out_dir",
         relations={Relation(from_task_id="AAA", to_task_id="BBB")},
         nodes=dict(AAA=ParsedActionNode(DummyMapper(Element("dummy"), name="AAA"))),
         dependencies={"import AAAA"},
     )
     parse_workflow_mock.return_value = workflow
     # When
     self.converter.convert()
     # Then
     parse_workflow_mock.assert_called_once_with()
     black_mock.format_file_in_place.assert_called_once_with(
         Path("/tmp/test_dag.py"), fast=mock.ANY, mode=mock.ANY, write_back=mock.ANY
     )
     autoflake_fix_file_mock.assert_called_once_with(
         "/tmp/test_dag.py",
         args=AutoflakeArgs(
             remove_all_unused_imports=True,
             ignore_init_module_imports=False,
             remove_duplicate_keys=False,
             remove_unused_variables=True,
             in_place=True,
             imports=None,
             expand_star_imports=False,
             check=False,
         ),
         standard_out=sys.stdout,
     )
     sort_imports_mock.assert_called_once_with("/tmp/test_dag.py")
    def test_write_dag_file(self, render_template_mock):
        relations = {Relation(from_task_id="TASK_1", to_task_id="TASK_2")}
        nodes = dict(TASK_1=ParsedActionNode(DummyMapper(Element("dummy"), name="TASK_1")))
        dependencies = {"import awesome_stuff"}
        workflow = Workflow(
            input_directory_path="/tmp/input_directory",
            output_directory_path="/tmp/input_directory",
            dag_name="test_dag",
            relations=relations,
            nodes=nodes,
            dependencies=dependencies,
        )

        content = self.converter.render_workflow(workflow=workflow)

        render_template_mock.assert_called_once_with(
            dag_name="test_dag",
            dependencies={"import awesome_stuff"},
            nodes=[nodes["TASK_1"]],
            params={"user.name": "USER"},
            relations={Relation(from_task_id="TASK_1", to_task_id="TASK_2")},
            schedule_interval=None,
            start_days_ago=None,
            template_name="workflow.tpl",
        )

        self.assertEqual(content, "TEXT_CONTENT")
class SubWorkflowTemplateTestCase(TestCase, TemplateTestMixin):
    TEMPLATE_NAME = "subworkflow.tpl"

    DEFAULT_TEMPLATE_PARAMS = dict(
        dependencies={"import awesome_stuff"},
        nodes=[
            ParsedActionNode(
                mock.MagicMock(spec=DummyMapper),
                tasks=[
                    Task(task_id="first_task", template_name="dummy.tpl"),
                    Task(task_id="second_task", template_name="dummy.tpl"),
                ],
                relations=[
                    Relation(from_task_id="first_task",
                             to_task_id="second_task")
                ],
            )
        ],
        job_properties={"user.name": "USER"},
        config={"key": "value"},
        relations={Relation(from_task_id="TASK_1", to_task_id="TASK_2")},
    )

    def test_green_path(self):
        res = render_template(self.TEMPLATE_NAME,
                              **self.DEFAULT_TEMPLATE_PARAMS)
        self.assertValidPython(res)
Beispiel #12
0
    def parse_join_node(self, join_node):
        """
        Join nodes wait for the corresponding beginning fork node paths to
        finish. As the parser we are assuming the Oozie workflow follows the
        schema perfectly.
        """
        mapper = DummyMapper(oozie_node=join_node,
                             name=join_node.attrib["name"],
                             dag_name=self.workflow.dag_name)

        p_node = ParsedActionNode(mapper)
        p_node.add_downstream_node_name(join_node.attrib["to"])

        mapper.on_parse_node()

        logging.info(f"Parsed {mapper.name} as Join Node.")
        self.workflow.nodes[join_node.attrib["name"]] = p_node
Beispiel #13
0
    def parse_join_node(self, join_node):
        """
        Join nodes wait for the corresponding beginning fork node paths to
        finish. As the parser we are assuming the Oozie workflow follows the
        schema perfectly.
        """
        map_class = self.control_map["join"]
        mapper = map_class(oozie_node=join_node, name=join_node.attrib["name"])

        p_node = ParsedActionNode(mapper)
        p_node.add_downstream_node_name(join_node.attrib["to"])

        mapper.on_parse_node()

        logging.info(f"Parsed {mapper.name} as Join Node.")
        self.workflow.nodes[join_node.attrib["name"]] = p_node
        self.workflow.dependencies.update(mapper.required_imports())
    def test_on_parse_finish(self):
        workflow = Workflow(input_directory_path=None,
                            output_directory_path=None,
                            dag_name=None)

        mapper = self._get_start_mapper(name="first_task")

        workflow.nodes["first_task"] = ParsedActionNode(
            mock.Mock(autospec=BaseMapper))
        workflow.nodes["second_task"] = ParsedActionNode(mapper)

        workflow.relations = {
            Relation(from_task_id="first_task", to_task_id="second_task")
        }

        mapper.on_parse_finish(workflow)

        self.assertEqual(set(workflow.nodes.keys()), {"second_task"})
        self.assertEqual(workflow.relations, set())
Beispiel #15
0
    def test_should_remove_start_node(self):
        transformer = RemoveStartTransformer()

        workflow = Workflow(input_directory_path="", output_directory_path="", dag_name="DAG_NAME_B")

        other_mapper = mock.Mock(spec=BaseMapper)
        other_mapper.name = "first_task"
        start_mapper = mock.Mock(spec=StartMapper)
        start_mapper.name = "start_task"

        workflow.nodes[other_mapper.name] = ParsedActionNode(
            mapper=other_mapper, tasks=[self._get_dummy_task(other_mapper.name)]
        )
        workflow.nodes[start_mapper.name] = ParsedActionNode(
            mapper=start_mapper, tasks=[self._get_dummy_task(start_mapper.name)]
        )

        transformer.process_workflow(workflow)

        self.assertEqual({other_mapper.name}, set(workflow.nodes.keys()))
Beispiel #16
0
    def parse_fork_node(self, root, fork_node):
        """
        Fork nodes need to be dummy operators with multiple parallel downstream
        tasks.

        This parses the fork node, the action nodes that it references and then
        the join node at the end.

        This will only parse well-formed xml-adhering workflows where all paths
        end at the join node.
        """
        fork_name = fork_node.attrib["name"]
        mapper = DummyMapper(oozie_node=fork_node,
                             name=fork_name,
                             dag_name=self.workflow.dag_name)
        p_node = ParsedActionNode(mapper)

        mapper.on_parse_node()

        logging.info(f"Parsed {mapper.name} as Fork Node.")
        paths = []
        for node in fork_node:
            if "path" in node.tag:
                # Parse all the downstream tasks that can run in parallel.
                curr_name = node.attrib["start"]
                paths.append(xml_utils.find_node_by_name(root, curr_name))

        self.workflow.nodes[fork_name] = p_node

        for path in paths:
            p_node.add_downstream_node_name(path.attrib["name"])
            logging.info(
                f"Added {mapper.name}'s downstream: {path.attrib['name']}")

            # Theoretically these will all be action nodes, however I don't
            # think that is guaranteed.
            # The end of the execution path has not been reached
            self.parse_node(root, path)
            if path.attrib["name"] not in self.workflow.nodes:
                root.remove(path)
    def test_on_parse_finish_decision_should_not_remove_end_node(self):
        workflow = Workflow(input_directory_path="", output_directory_path="", dag_name="DAG_NAME_B")

        mapper = self._get_end_mapper("end_task")

        workflow.nodes["first_task"] = ParsedActionNode(
            mock.Mock(spec=DecisionMapper), tasks=[self._get_dummy_task("first_task")]
        )
        workflow.nodes["second_task"] = ParsedActionNode(
            mock.Mock(spec=BaseMapper), tasks=[self._get_dummy_task("second_task")]
        )
        workflow.nodes["end_task"] = ParsedActionNode(mapper, tasks=[self._get_dummy_task("end_task")])

        workflow.relations = {
            Relation(from_task_id="first_task", to_task_id="end_task"),
            Relation(from_task_id="second_task", to_task_id="end_task"),
        }

        mapper.on_parse_finish(workflow)

        self.assertEqual({"first_task", "second_task", "end_task"}, set(workflow.nodes.keys()))
        self.assertEqual({Relation(from_task_id="first_task", to_task_id="end_task")}, workflow.relations)
Beispiel #18
0
    def parse_decision_node(self, decision_node):
        """
        A decision node enables a workflow to make a selection on the execution
        path to follow.

        The behavior of a decision node can be seen as a switch-case statement.

        A decision node consists of a list of predicates-transition pairs plus
        a default transition. Predicates are evaluated in order or appearance
        until one of them evaluates to true and the corresponding transition is
        taken. If none of the predicates evaluates to true the default
        transition is taken.

        example oozie wf decision node:

        <decision name="[NODE-NAME]">
            <switch>
                <case to="[NODE_NAME]">[PREDICATE]</case>
                ...
                <case to="[NODE_NAME]">[PREDICATE]</case>
                <default to="[NODE_NAME]"/>
            </switch>
        </decision>
        """
        mapper = DecisionMapper(
            oozie_node=decision_node,
            name=decision_node.attrib["name"],
            dag_name=self.workflow.dag_name,
            props=self.props,
        )

        p_node = ParsedActionNode(mapper)
        for cases in decision_node[0]:
            p_node.add_downstream_node_name(cases.attrib["to"])

        mapper.on_parse_node()

        logging.info(f"Parsed {mapper.name} as Decision Node.")
        self.workflow.nodes[decision_node.attrib["name"]] = p_node
Beispiel #19
0
 def _create_workflow(nodes=None):
     return Workflow(
         dag_name="A",
         input_directory_path="in_dir",
         output_directory_path="out_dir",
         relations={Relation(from_task_id="DAG_NAME_A", to_task_id="DAG_NAME_B")},
         nodes=dict(
             AAA=ParsedActionNode(DummyMapper(Element("dummy"), name="DAG_NAME_A", dag_name="DAG_NAME_B"))
         )
         if not nodes
         else nodes,
         dependencies={"import IMPORT"},
     )
Beispiel #20
0
    def test_should_remove_node_in_error_flow(self):
        transformer = RemoveKillTransformer()

        workflow = Workflow(input_directory_path="",
                            output_directory_path="",
                            dag_name="DAG_NAME_B")

        first_mapper = mock.Mock(spec=BaseMapper)
        first_mapper.name = "first_task"
        second_mapper = mock.Mock(spec=KillMapper)
        second_mapper.name = "second_task"

        first_node = ParsedActionNode(first_mapper)
        second_node = ParsedActionNode(second_mapper)
        first_node.error_xml = second_mapper.name

        workflow.nodes[first_mapper.name] = first_node
        workflow.nodes[second_mapper.name] = second_node

        transformer.process_workflow(workflow)

        self.assertEqual({first_mapper.name}, set(workflow.nodes.keys()))
 def test_create_dag_file(self, open_mock, _):
     # Given
     workflow = Workflow(
         dag_name="A",
         input_directory_path="in_dir",
         output_directory_path="out_dir",
         relations={Relation(from_task_id="AAA", to_task_id="BBB")},
         nodes=dict(AAA=ParsedActionNode(DummyMapper(Element("dummy"), name="AAA"))),
         dependencies={"import AAAA"},
     )
     # When
     self.converter.create_dag_file(workflow)
     # Then
     open_mock.assert_called_once_with("/tmp/test_dag.py", "w")
Beispiel #22
0
    def parse_start_node(self, start_node):
        """
        The start node is the entry point for a workflow job, it indicates the
        first workflow node the workflow job must transition to.

        When a workflow is started, it automatically transitions to the
        node specified in the start.

        A workflow definition must have one start node.
        """
        map_class = self.control_map["start"]
        # Theoretically this could cause conflicts, but it is very unlikely
        start_name = "start_node_" + str(uuid.uuid4())[:4]
        mapper = map_class(oozie_node=start_node, name=start_name)

        p_node = ParsedActionNode(mapper)
        p_node.add_downstream_node_name(start_node.attrib["to"])

        mapper.on_parse_node()

        logging.info(f"Parsed {mapper.name} as Start Node.")
        self.workflow.nodes[start_name] = p_node
        self.workflow.dependencies.update(mapper.required_imports())
Beispiel #23
0
    def parse_end_node(self, end_node):
        """
        Upon reaching the end node, the workflow is considered completed successfully.
        Thus it gets mapped to a dummy node that always completes.
        """
        map_class = self.control_map["end"]
        mapper = map_class(oozie_node=end_node, name=end_node.attrib["name"])
        p_node = ParsedActionNode(mapper)

        mapper.on_parse_node()

        logging.info(f"Parsed {mapper.name} as End Node.")
        self.workflow.nodes[end_node.attrib["name"]] = p_node
        self.workflow.dependencies.update(mapper.required_imports())
Beispiel #24
0
    def test_should_remove_end_node(self):
        transformer = RemoveEndTransformer()

        workflow = Workflow(input_directory_path="",
                            output_directory_path="",
                            dag_name="DAG_NAME_B")

        first_mapper = mock.Mock(spec=BaseMapper)
        first_mapper.name = "first_task"
        end_mapper = mock.Mock(spec=EndMapper)
        end_mapper.name = "second_task"

        first_node = ParsedActionNode(mapper=first_mapper)
        end_node = ParsedActionNode(mapper=end_mapper)
        first_node.downstream_names = [end_node.name]

        workflow.nodes[first_mapper.name] = first_node
        workflow.nodes[end_mapper.name] = end_node

        transformer.process_workflow(workflow)

        self.assertEqual({first_mapper.name}, set(workflow.nodes.keys()))
        self.assertEqual([], first_node.downstream_names)
Beispiel #25
0
    def parse_end_node(self, end_node):
        """
        Upon reaching the end node, the workflow is considered completed successfully.
        Thus it gets mapped to a dummy node that always completes.
        """
        mapper = EndMapper(oozie_node=end_node,
                           name=end_node.attrib["name"],
                           dag_name=self.workflow.dag_name)
        p_node = ParsedActionNode(mapper)

        mapper.on_parse_node()

        logging.info(f"Parsed {mapper.name} as End Node.")
        self.workflow.nodes[end_node.attrib["name"]] = p_node
Beispiel #26
0
    def test_should_not_remove_end_node_when_connected_with_decision(self):
        transformer = RemoveEndTransformer()

        workflow = Workflow(input_directory_path="",
                            output_directory_path="",
                            dag_name="DAG_NAME_B")

        decision_mapper = mock.Mock(spec=DecisionMapper)
        decision_mapper.name = "first_task"
        second_mapper = mock.Mock(spec=BaseMapper)
        second_mapper.name = "second_task"
        end_mapper = mock.Mock(spec=EndMapper)
        end_mapper.name = "end_task"

        decision_node = ParsedActionNode(
            mapper=decision_mapper,
            tasks=[self._get_dummy_task(decision_mapper.name)])
        second_node = ParsedActionNode(
            mapper=second_mapper,
            tasks=[self._get_dummy_task(second_mapper.name)])
        end_node = ParsedActionNode(
            mapper=end_mapper, tasks=[self._get_dummy_task(end_mapper.name)])
        decision_node.downstream_names = [second_mapper.name, end_mapper.name]

        workflow.nodes[decision_mapper.name] = decision_node
        workflow.nodes[second_mapper.name] = second_node
        workflow.nodes[end_mapper.name] = end_node

        transformer.process_workflow(workflow)

        self.assertEqual(
            {decision_mapper.name, second_mapper.name, end_mapper.name},
            set(workflow.nodes.keys()))
        self.assertEqual([second_mapper.name, end_mapper.name],
                         decision_node.downstream_names)
        self.assertEqual([], second_node.downstream_names)
        self.assertEqual([], end_node.downstream_names)
Beispiel #27
0
    def parse_kill_node(self, kill_node: ET.Element):
        """
        When a workflow node reaches the `kill` node, it finishes in an error.
        A workflow definition may have zero or more kill nodes.
        """
        map_class = self.control_map["kill"]
        mapper = map_class(oozie_node=kill_node,
                           name=kill_node.attrib["name"],
                           trigger_rule=TriggerRule.ONE_FAILED)
        p_node = ParsedActionNode(mapper)

        mapper.on_parse_node()

        logging.info(f"Parsed {mapper.name} as Kill Node.")
        self.workflow.nodes[kill_node.attrib["name"]] = p_node
        self.workflow.dependencies.update(mapper.required_imports())
Beispiel #28
0
    def parse_kill_node(self, kill_node: ET.Element):
        """
        When a workflow node reaches the `kill` node, it finishes in an error.
        A workflow definition may have zero or more kill nodes.
        """
        mapper = KillMapper(
            oozie_node=kill_node,
            name=kill_node.attrib["name"],
            dag_name=self.workflow.dag_name,
            trigger_rule=TriggerRule.ONE_FAILED,
            props=self.props,
        )
        p_node = ParsedActionNode(mapper)

        mapper.on_parse_node()

        logging.info(f"Parsed {mapper.name} as Kill Node.")
        self.workflow.nodes[kill_node.attrib["name"]] = p_node
Beispiel #29
0
    def parse_action_node(self, action_node: ET.Element):
        """
        Action nodes are the mechanism by which a workflow triggers the
        execution of a computation/processing task.

        Action nodes are required to have an action-choice (map-reduce, etc.),
        ok, and error node in the xml.
        """
        # The 0th element of the node is the actual action tag.
        # In the form of 'action'
        action_operation_node = action_node[0]
        action_name = action_operation_node.tag

        if action_name not in self.action_map:
            action_name = "unknown"

        map_class = self.action_map[action_name]
        mapper = map_class(
            oozie_node=action_operation_node,
            name=action_node.attrib["name"],
            params=self.params,
            dag_name=self.workflow.dag_name,
            action_mapper=self.action_map,
            control_mapper=self.control_map,
            input_directory_path=self.workflow.input_directory_path,
            output_directory_path=self.workflow.output_directory_path,
        )

        p_node = ParsedActionNode(mapper)
        ok_node = action_node.find("ok")
        if ok_node is None:
            raise Exception("Missing ok node in {}".format(action_node))
        p_node.add_downstream_node_name(ok_node.attrib["to"])
        error_node = action_node.find("error")
        if error_node is None:
            raise Exception("Missing error node in {}".format(action_node))
        p_node.set_error_node_name(error_node.attrib["to"])

        mapper.on_parse_node()

        logging.info(
            f"Parsed {mapper.name} as Action Node of type {action_name}.")
        self.workflow.dependencies.update(mapper.required_imports())

        self.workflow.nodes[mapper.name] = p_node
class WorkflowTemplateTestCase(TestCase, TemplateTestMixin):
    TEMPLATE_NAME = "workflow.tpl"

    DEFAULT_TEMPLATE_PARAMS = dict(
        dag_name="test_dag",
        dependencies={"import awesome_stuff"},
        nodes=[
            ParsedActionNode(
                mock.MagicMock(spec=DummyMapper),
                tasks=[
                    Task(task_id="first_task", template_name="dummy.tpl"),
                    Task(task_id="second_task", template_name="dummy.tpl"),
                ],
            )
        ],
        params={"user.name": "USER"},
        relations={Relation(from_task_id="TASK_1", to_task_id="TASK_2")},
        schedule_interval=None,
        start_days_ago=None,
    )

    def test_green_path(self):
        res = render_template(self.TEMPLATE_NAME, **self.DEFAULT_TEMPLATE_PARAMS)
        self.assertValidPython(res)