예제 #1
0
class SubWorkflowTemplateTestCase(BaseTestCases.BaseTemplateTestCase):
    TEMPLATE_NAME = "subworkflow.tpl"

    DEFAULT_TEMPLATE_PARAMS = dict(
        dependencies={"import awesome_stuff"},
        task_groups=[
            TaskGroup(
                name="AAA",
                tasks=[
                    Task(task_id="first_task", template_name="dummy.tpl"),
                    Task(task_id="second_task", template_name="dummy.tpl"),
                ],
                relations=[
                    Relation(from_task_id="first_task",
                             to_task_id="second_task")
                ],
            )
        ],
        job_properties={"user.name": "USER"},
        config={"key": "value"},
        relations={Relation(from_task_id="TASK_1", to_task_id="TASK_2")},
    )

    def test_green_path(self):
        res = render_template(self.TEMPLATE_NAME,
                              **self.DEFAULT_TEMPLATE_PARAMS)
        self.assertValidPython(res)
예제 #2
0
    def add_state_handler_if_needed(self):
        """
        Add additional tasks and relations to handle error and ok flow.

        If the error path is specified, additional relations and task are added to handle
        the error state.
        If the error path and the ok path is specified, additional relations and task are added
        to handle the ok path and the error path.
        If the error path and the ok path is not-specified, no action is performed.
        """
        if not self.error_downstream_name:
            return
        error_handler_task_id = self.name + "_error"
        error_handler = Task(
            task_id=error_handler_task_id, template_name="dummy.tpl", trigger_rule=TriggerRule.ONE_FAILED
        )
        self.error_handler_task = error_handler
        new_relations = (
            Relation(from_task_id=t.task_id, to_task_id=error_handler_task_id, is_error=True)
            for t in self.tasks
        )
        self.relations.extend(new_relations)

        if not self.downstream_names:
            return
        ok_handler_task_id = self.name + "_ok"
        ok_handler = Task(
            task_id=ok_handler_task_id, template_name="dummy.tpl", trigger_rule=TriggerRule.ONE_SUCCESS
        )
        self.ok_handler_task = ok_handler
        self.relations.append(Relation(from_task_id=self.tasks[-1].task_id, to_task_id=ok_handler_task_id))
예제 #3
0
 def to_tasks_and_relations(self):
     prepare_command = self.get_prepare_command(self.oozie_node,
                                                self.params)
     tasks = [
         Task(
             task_id=self.name + "_prepare",
             template_name="prepare.tpl",
             trigger_rule=self.trigger_rule,
             template_params=dict(prepare_command=prepare_command),
         ),
         Task(
             task_id=self.name,
             template_name="pig.tpl",
             trigger_rule=self.trigger_rule,
             template_params=dict(
                 properties=self.properties,
                 params_dict=self.params_dict,
                 script_file_name=self.script_file_name,
             ),
         ),
     ]
     relations = [
         Relation(from_task_id=self.name + "_prepare", to_task_id=self.name)
     ]
     return tasks, relations
예제 #4
0
 def to_tasks_and_relations(self):
     tasks = [
         Task(
             task_id=self.name,
             template_name="mapreduce.tpl",
             trigger_rule=self.trigger_rule,
             template_params=dict(
                 properties=self.properties,
                 params_dict=self.params_dict,
                 hdfs_files=self.hdfs_files,
                 hdfs_archives=self.hdfs_archives,
             ),
         )
     ]
     relations = []
     if self.has_prepare(self.oozie_node):
         prepare_command = self.get_prepare_command(self.oozie_node, self.params)
         tasks.insert(
             0,
             Task(
                 task_id=self.name + "_prepare",
                 template_name="prepare.tpl",
                 trigger_rule=self.trigger_rule,
                 template_params=dict(prepare_command=prepare_command),
             ),
         )
         relations = [Relation(from_task_id=self.name + "_prepare", to_task_id=self.name)]
     return tasks, relations
    def test_to_tasks_and_relations(self):
        params = {
            "dataproc_cluster": "my-cluster",
            "gcp_region": "europe-west3",
            "nameNode": "hdfs://localhost:9020/",
        }
        mapper = self._get_shell_mapper(params=params)
        tasks, relations = mapper.to_tasks_and_relations()

        self.assertEqual(
            tasks,
            [
                Task(
                    task_id="test_id_prepare",
                    template_name="prepare.tpl",
                    template_params={
                        "prepare_command":
                        "$DAGS_FOLDER/../data/prepare.sh -c my-cluster -r europe-west3 "
                        '-d "//examples/output-data/demo/pig-node //examples/output-data'
                        '/demo/pig-node2" -m "//examples/input-data/demo/pig-node '
                        '//examples/input-data/demo/pig-node2"'
                    },
                ),
                Task(
                    task_id="test_id",
                    template_name="shell.tpl",
                    template_params={"pig_command": "sh 'echo arg1 arg2'"},
                ),
            ],
        )
        self.assertEqual(
            relations,
            [Relation(from_task_id="test_id_prepare", to_task_id="test_id")])
예제 #6
0
class SubWorkflowTemplateTestCase(TestCase, TemplateTestMixin):
    TEMPLATE_NAME = "subworkflow.tpl"

    DEFAULT_TEMPLATE_PARAMS = dict(
        dependencies={"import awesome_stuff"},
        nodes=[
            ParsedActionNode(
                mock.MagicMock(spec=DummyMapper),
                tasks=[
                    Task(task_id="first_task", template_name="dummy.tpl"),
                    Task(task_id="second_task", template_name="dummy.tpl"),
                ],
                relations=[
                    Relation(from_task_id="first_task",
                             to_task_id="second_task")
                ],
            )
        ],
        job_properties={"user.name": "USER"},
        config={"key": "value"},
        relations={Relation(from_task_id="TASK_1", to_task_id="TASK_2")},
    )

    def test_green_path(self):
        res = render_template(self.TEMPLATE_NAME,
                              **self.DEFAULT_TEMPLATE_PARAMS)
        self.assertValidPython(res)
class WorkflowTemplateTestCase(TestCase, TemplateTestMixin):
    TEMPLATE_NAME = "workflow.tpl"

    DEFAULT_TEMPLATE_PARAMS = dict(
        dag_name="test_dag",
        dependencies={"import awesome_stuff"},
        task_groups=[
            TaskGroup(
                name="TASK_GROUP",
                tasks=[
                    Task(task_id="first_task", template_name="dummy.tpl"),
                    Task(task_id="second_task", template_name="dummy.tpl"),
                ],
            )
        ],
        job_properties={"user.name": "USER"},
        config={},
        relations={Relation(from_task_id="TASK_1", to_task_id="TASK_2")},
        schedule_interval=3,
        start_days_ago=3,
        task_map={"oozie-task": ["airflow-task"]},
    )

    def test_green_path(self):
        res = render_template(self.TEMPLATE_NAME, **self.DEFAULT_TEMPLATE_PARAMS)
        self.assertValidPython(res)
예제 #8
0
    def test_task_and_relations(self):
        # Given
        mapper = _get_distcp_mapper(self.distcp_node,
                                    job_properties=EXAMPLE_JOB_PROPERTIES,
                                    config=EXAMPLE_CONFIG_PROPERTIES)

        # When
        mapper.on_parse_node()
        tasks, relations = mapper.to_tasks_and_relations()

        # Then
        self.assertEqual(mapper.oozie_node, self.distcp_node)
        self.assertIsNotNone(tasks)
        self.assertIsNotNone(relations)
        self.assertEqual(2, len(tasks))
        self.assertEqual(1, len(relations))
        self.assertEqual(
            [
                Task(
                    task_id="distcp_prepare",
                    template_name="prepare.tpl",
                    trigger_rule="one_success",
                    template_params={
                        "delete": "/tmp/d_path",
                        "mkdir": None
                    },
                ),
                Task(
                    task_id="distcp",
                    template_name="distcp.tpl",
                    trigger_rule="one_success",
                    template_params={
                        "props":
                        PropertySet(
                            config={
                                "dataproc_cluster": "my-cluster",
                                "gcp_region": "europe-west3"
                            },
                            job_properties={
                                "nameNode1": "hdfs://localhost:8081",
                                "nameNode2": "hdfs://localhost:8082",
                            },
                            action_node_properties={
                                "oozie.launcher.mapreduce.job.hdfs-servers":
                                "{{nameNode1}} ,{{nameNode2}}"
                            },
                        ),
                        "distcp_command":
                        "--class=org.apache.hadoop.tools.DistCp -- -update -skipcrccheck "
                        "-strategy dynamic '{{nameNode1}}/path/to/input file.txt' "
                        "'{{nameNode2}}/path/to/output-file.txt'",
                    },
                ),
            ],
            tasks,
        )
        self.assertEqual([
            Relation(from_task_id=f"{mapper.name}_prepare",
                     to_task_id=mapper.name)
        ], relations)
    def test_convert_nodes(self):
        tasks_1 = [
            Task(task_id="first_task", template_name="dummy.tpl"),
            Task(task_id="second_task", template_name="dummy.tpl"),
        ]
        relations_1 = {
            Relation(from_task_id="first_task", to_task_id="tasks_2")
        }
        tasks_2 = [Task(task_id="third_task", template_name="dummy.tpl")]
        relations_2 = {}

        mapper_1 = mock.MagicMock(
            **{"to_tasks_and_relations.return_value": (tasks_1, relations_1)})
        mapper_2 = mock.MagicMock(
            **{"to_tasks_and_relations.return_value": (tasks_2, relations_2)})

        node_1 = ParsedNode(mapper=mapper_1)
        node_2 = ParsedNode(mapper=mapper_2)
        nodes = dict(TASK_1=node_1, TASK_2=node_2)

        self.converter.convert_nodes(nodes=nodes)
        self.assertIs(node_1.tasks, tasks_1)
        self.assertIs(node_2.tasks, tasks_2)
        self.assertIs(node_1.relations, relations_1)
        self.assertIs(node_2.relations, relations_2)
    def test_to_tasks_and_relations(self):
        job_properties = {"nameNode": "hdfs://localhost:9020/", "queueName": "default"}
        config = {"dataproc_cluster": "my-cluster", "gcp_region": "europe-west3"}
        mapper = self._get_shell_mapper(job_properties=job_properties, config=config)
        mapper.on_parse_node()
        tasks, relations = mapper.to_tasks_and_relations()

        self.assertEqual(
            [
                Task(
                    task_id="test_id_prepare",
                    template_name="prepare.tpl",
                    template_params={
                        "delete": "//examples/output-data/demo/pig-node "
                        "//examples/output-data/demo/pig-node2",
                        "mkdir": "//examples/input-data/demo/pig-node "
                        "//examples/input-data/demo/pig-node2",
                    },
                ),
                Task(
                    task_id="test_id",
                    template_name="shell.tpl",
                    template_params={
                        "pig_command": "sh echo arg1 arg2",
                        "action_node_properties": {
                            "mapred.job.queue.name": "default",
                            "mapred.map.output.compress": "false",
                        },
                    },
                ),
            ],
            tasks,
        )
        self.assertEqual(relations, [Relation(from_task_id="test_id_prepare", to_task_id="test_id")])
class SubWorkflowTemplateTestCase(TestCase, TemplateTestMixin):
    TEMPLATE_NAME = "subworkflow.tpl"

    DEFAULT_TEMPLATE_PARAMS = dict(
        dependencies=["import awesome_stuff"],
        nodes=[
            ParsedNode(
                mock.MagicMock(spec=DummyMapper),
                tasks=[
                    Task(task_id="first_task", template_name="dummy.tpl"),
                    Task(task_id="second_task", template_name="dummy.tpl"),
                ],
                relations=[
                    Relation(from_task_id="first_task",
                             to_task_id="second_task")
                ],
            )
        ],
        params={"user.name": "USER"},
        relations={Relation(from_task_id="TASK_1", to_task_id="TASK_2")},
        schedule_interval=None,
        start_days_ago=None,
    )

    def test_green_path(self):
        res = render_template(self.TEMPLATE_NAME,
                              **self.DEFAULT_TEMPLATE_PARAMS)
        self.assertValidPython(res)
예제 #12
0
    def _get_tasks(self):
        """
        Returns the list of Airflow tasks that are the result of mapping

        :return: list of Airflow tasks
        """
        action_task = Task(
            task_id=self.name,
            template_name="spark.tpl",
            trigger_rule=self.trigger_rule,
            template_params=dict(
                main_jar=self.java_jar,
                main_class=self.java_class,
                arguments=self.application_args,
                archives=self.hdfs_archives,
                files=self.hdfs_files,
                job_name=self.job_name,
                dataproc_spark_properties=self.properties,
                dataproc_spark_jars=self.dataproc_jars,
            ),
        )

        if not self.has_prepare(self.oozie_node):
            return [action_task]

        prepare_task = Task(
            task_id=self.name + "_prepare",
            template_name="prepare.tpl",
            template_params=dict(prepare_command=self.prepare_command),
        )
        return [prepare_task, action_task]
예제 #13
0
    def test_should_add_end_success_workflow_node(self):
        # Given
        transformer = AddWorkflowNotificationTransformer()
        workflow = Workflow(input_directory_path="",
                            output_directory_path="",
                            dag_name="DAG_NAME_B")
        props = PropertySet(
            job_properties={
                PROP_WORKFLOW_NOTIFICATION_URL: "http://example.com/workflow"
            })
        first_task_group = TaskGroup(
            name="first_task",
            tasks=[Task(task_id="first_task", template_name="dummy.tpl")])

        # When
        workflow.task_groups[first_task_group.name] = first_task_group

        # Then
        transformer.process_workflow_after_convert_nodes(workflow, props)
        self.assertIn(END_SUCCESS_TASK_GROUP_NAME, workflow.task_groups.keys())
        self.assertIn(END_SUCCESS_TASK_GROUP_NAME,
                      first_task_group.downstream_names)
        self.assertEqual(
            [
                Task(
                    task_id=END_SUCCESS_TASK_GROUP_NAME,
                    template_name="http.tpl",
                    trigger_rule="one_success",
                    template_params={"url": "http://example.com/workflow"},
                )
            ],
            workflow.task_groups[END_SUCCESS_TASK_GROUP_NAME].tasks,
        )
예제 #14
0
 def to_tasks_and_relations(self):
     tasks = [
         Task(
             task_id=self.name,
             template_name="git.tpl",
             template_params=dict(bash_command=self.bash_command),
         )
     ]
     relations = []
     if self.has_prepare(self.oozie_node):
         prepare_command = self.get_prepare_command(self.oozie_node,
                                                    self.params)
         tasks.insert(
             0,
             Task(
                 task_id=self.name + "_prepare",
                 template_name="prepare.tpl",
                 template_params=dict(prepare_command=prepare_command),
             ),
         )
         relations = [
             Relation(from_task_id=self.name + "_prepare",
                      to_task_id=self.name)
         ]
     return tasks, relations
    def test_to_tasks_and_relations(self):
        job_properties = {"nameNode": "hdfs://"}
        config = {
            "dataproc_cluster": "my-cluster",
            "gcp_region": "europe-west3"
        }
        mapper = self._get_pig_mapper(job_properties=job_properties,
                                      config=config)
        mapper.on_parse_node()
        tasks, relations = mapper.to_tasks_and_relations()

        self.assertEqual(
            tasks,
            [
                Task(
                    task_id="test_id_prepare",
                    template_name="prepare.tpl",
                    template_params={
                        "delete":
                        "/examples/output-data/demo/pig-node /examples/output-data/demo/pig-node2",
                        "mkdir":
                        "/examples/input-data/demo/pig-node /examples/input-data/demo/pig-node2",
                    },
                ),
                Task(
                    task_id="test_id",
                    template_name="pig.tpl",
                    template_params={
                        "props":
                        PropertySet(
                            config={
                                "dataproc_cluster": "my-cluster",
                                "gcp_region": "europe-west3"
                            },
                            job_properties={"nameNode": "hdfs://"},
                            action_node_properties={
                                "mapred.job.queue.name": "${queueName}",
                                "mapred.map.output.compress": "false",
                            },
                        ),
                        "params_dict": {
                            "INPUT":
                            "/user/${wf:user()}/${examplesRoot}/input-data/text",
                            "OUTPUT":
                            "/user/${wf:user()}/${examplesRoot}/output-data/demo/pig-node",
                        },
                        "script_file_name":
                        "id.pig",
                        "action_node_properties": {
                            "mapred.job.queue.name": "${queueName}",
                            "mapred.map.output.compress": "false",
                        },
                    },
                ),
            ],
        )
        self.assertEqual(
            [Relation(from_task_id="test_id_prepare", to_task_id="test_id")],
            relations)
    def test_should_handle_join_type_case(self):
        """
        Input:

        CONTROL >      < ACTION
                |     |
                CONTROL


        Expected output:

                         STATUS
                         |
        CONTROL >      < ACTION
                |     |
        TRANSITION    TRANSITION
                |     |
                CONTROL
        """
        # Given
        control_task_group = ControlTaskGroup(
            name="control_task_group",
            tasks=[Task(task_id="control_task", template_name="dummy.tpl")])
        join_task_group = ControlTaskGroup(
            name="join_task_group",
            tasks=[Task(task_id="join_task", template_name="dummy.tpl")])
        self.workflow.task_groups[control_task_group.name] = control_task_group
        self.workflow.task_groups[join_task_group.name] = join_task_group
        self.action_task_group.downstream_names = [join_task_group.name]
        control_task_group.downstream_names = [join_task_group.name]

        exp_action_status_notification_name = f"{self.action_task_group.name}{NODE_STATUS_SUFFIX}"
        exp_action_control_transition_notification_name = (
            f"{self.action_task_group.name}{NODE_TRANSITION_SUFFIX}_T_{join_task_group.name}"
        )
        exp_control_control_transition_notification_name = (
            f"{control_task_group.name}{NODE_TRANSITION_SUFFIX}_T_{join_task_group.name}"
        )

        # When
        self.transformer.process_workflow_after_convert_nodes(
            self.workflow, self.props)

        # Then
        self.assertEqual(6, len(self.workflow.task_groups))
        self.assertEqual(
            {
                exp_action_status_notification_name,
                control_task_group.name,
                self.action_task_group.name,
                exp_action_control_transition_notification_name,
                exp_control_control_transition_notification_name,
                join_task_group.name,
            },
            self.workflow.task_groups.keys(),
        )
예제 #17
0
    def test_convert_to_text_with_prepare_node(self):
        git_node = ET.fromstring(EXAMPLE_XML)

        mapper = self._get_git_mapper(git_node)
        mapper.on_parse_node()

        tasks, relations = mapper.to_tasks_and_relations()

        self.assertEqual(
            [
                Task(
                    task_id="test_id_prepare",
                    template_name="prepare.tpl",
                    trigger_rule="one_success",
                    template_params={
                        "delete": "/tmp/d_path",
                        "mkdir": "/tmp/mk_path"
                    },
                ),
                Task(
                    task_id="test_id",
                    template_name="git.tpl",
                    trigger_rule="one_success",
                    template_params={
                        "git_uri":
                        "https://github.com/apache/oozie",
                        "git_branch":
                        "{{branch}}",
                        "destination_path":
                        "/my_git_repo_directory",
                        "key_path":
                        "/awesome-key/",
                        "props":
                        PropertySet(
                            config={
                                "dataproc_cluster": "my-cluster",
                                "gcp_region": "europe-west3"
                            },
                            job_properties={
                                "branch": "my-awesome-branch",
                                "nameNode": "hdfs://",
                                "userName": "******",
                                "examplesRoot": "examples",
                            },
                            action_node_properties={},
                        ),
                    },
                ),
            ],
            tasks,
        )

        self.assertEqual(
            [Relation(from_task_id="test_id_prepare", to_task_id="test_id")],
            relations)
예제 #18
0
    def test_prepend_task_empty_relations(self):
        task_1 = Task(task_id=TEST_MAPPER_NAME + "_1", template_name="pig.tpl")
        task_2 = Task(task_id=TEST_MAPPER_NAME + "_2", template_name="pig.tpl")

        tasks, relations = ActionMapper.prepend_task(task_to_prepend=task_1,
                                                     tasks=[task_2],
                                                     relations=[])
        self.assertEqual([task_1, task_2], tasks)
        self.assertEqual([
            Relation(from_task_id="mapper_name_1", to_task_id="mapper_name_2")
        ], relations)
예제 #19
0
    def test_to_tasks_and_relations_with_prepare_node(self):
        spark_node = ET.fromstring(EXAMPLE_XML_WITH_PREPARE)
        mapper = self._get_spark_mapper(spark_node)
        mapper.on_parse_node()

        tasks, relations = mapper.to_tasks_and_relations()

        self.assertEqual(
            tasks,
            [
                Task(
                    task_id="test_id_prepare",
                    template_name="prepare.tpl",
                    template_params={
                        "prepare_command":
                        "$DAGS_FOLDER/../data/prepare.sh -c my-cluster -r europe-west3 "
                        '-d "/tmp/d_path" -m "/tmp/mk_path"'
                    },
                ),
                Task(
                    task_id="test_id",
                    template_name="spark.tpl",
                    template_params={
                        "main_jar":
                        None,
                        "main_class":
                        "org.apache.spark.examples.mllib.JavaALS",
                        "arguments":
                        ["inputpath=hdfs:///input/file.txt", "value=2"],
                        "archives": [],
                        "files": [],
                        "job_name":
                        "Spark Examples",
                        "dataproc_spark_properties": {
                            "mapred.compress.map.output":
                            "true",
                            "spark.executor.extraJavaOptions":
                            "-XX:+HeapDumpOnOutOfMemoryError "
                            "-XX:HeapDumpPath=/tmp",
                        },
                        "dataproc_spark_jars":
                        ["/lib/spark-examples_2.10-1.1.0.jar"],
                    },
                ),
            ],
        )

        self.assertEqual(
            relations,
            [Relation(from_task_id="test_id_prepare", to_task_id="test_id")])
예제 #20
0
 def test_multiple(self):
     relations = fs_mapper.chain([
         Task(task_id="task_1", template_name=""),
         Task(task_id="task_2", template_name=""),
         Task(task_id="task_3", template_name=""),
         Task(task_id="task_4", template_name=""),
     ])
     self.assertEqual(
         [
             Relation(from_task_id="task_1", to_task_id="task_2"),
             Relation(from_task_id="task_2", to_task_id="task_3"),
             Relation(from_task_id="task_3", to_task_id="task_4"),
         ],
         relations,
     )
    def test_convert_to_text_without_prepare_node(self):
        spark_node = ET.fromstring(EXAMPLE_XML)
        prepare_node = spark_node.find("prepare")
        spark_node.remove(prepare_node)
        mapper = self._get_git_mapper(spark_node)
        mapper.on_parse_node()

        tasks, relations = mapper.to_tasks_and_relations()

        self.assertEqual(
            tasks,
            [
                Task(
                    task_id="test_id",
                    template_name="git.tpl",
                    trigger_rule="dummy",
                    template_params={
                        "bash_command":
                        "$DAGS_FOLDER/../data/git.sh --cluster {dataproc_cluster} "
                        "--region {gcp_region} --git-uri https://github.com/apache/oozie "
                        "--destination-path /my_git_repo_directory --branch my-awesome-branch "
                        "--key-path /awesome-key/"
                    },
                )
            ],
        )
        self.assertEqual(relations, [])
예제 #22
0
    def test_convert_to_text_without_prepare_node(self):
        spark_node = ET.fromstring(EXAMPLE_XML)
        prepare_node = spark_node.find("prepare")
        spark_node.remove(prepare_node)
        mapper = self._get_git_mapper(spark_node)
        mapper.on_parse_node()

        tasks, relations = mapper.to_tasks_and_relations()

        self.assertEqual(
            [
                Task(
                    task_id="test_id",
                    template_name="git.tpl",
                    template_params={
                        "git_uri": "https://github.com/apache/oozie",
                        "git_branch": "my-awesome-branch",
                        "destination_path": "/my_git_repo_directory",
                        "key_path": "/awesome-key/",
                        "props": PropertySet(
                            config={"dataproc_cluster": "my-cluster", "gcp_region": "europe-west3"},
                            job_properties={
                                "branch": "my-awesome-branch",
                                "nameNode": "hdfs://",
                                "userName": "******",
                                "examplesRoot": "examples",
                            },
                            action_node_properties={},
                        ),
                    },
                )
            ],
            tasks,
        )
        self.assertEqual([], relations)
    def test_to_tasks_and_relations(self):
        mapper = self._get_ssh_mapper(job_properties={}, config={})

        tasks, relations = mapper.to_tasks_and_relations()

        self.assertEqual(
            [
                Task(
                    task_id="test_id",
                    template_name="ssh.tpl",
                    template_params={
                        "props":
                        PropertySet(config={},
                                    job_properties={},
                                    action_node_properties={}),
                        "command":
                        "'ls -l -a'",
                        "user":
                        "******",
                        "host":
                        "apache.org",
                    },
                )
            ],
            tasks,
        )
        self.assertEqual(relations, [])
예제 #24
0
 def test_to_tasks_and_relations(self):
     mapper = self._get_email_mapper(job_properties={"userName": "******"},
                                     config={})
     mapper.on_parse_node()
     tasks, relations = mapper.to_tasks_and_relations()
     self.assertEqual(
         [
             Task(
                 task_id="test_id",
                 template_name="email.tpl",
                 trigger_rule="one_success",
                 template_params={
                     "props":
                     PropertySet(config={},
                                 job_properties={"userName": "******"},
                                 action_node_properties={}),
                     "to_addr":
                     "[email protected],[email protected]",
                     "cc_addr":
                     "[email protected],[email protected]",
                     "bcc_addr":
                     "[email protected],[email protected]",
                     "subject":
                     "Email notifications for {{run_id}}",
                     "body":
                     "Hi {{userName}} , the wf {{run_id}} successfully "
                     "completed. Bye {{userName}}",
                 },
             )
         ],
         tasks,
     )
     self.assertEqual(relations, [])
예제 #25
0
    def test_to_tasks_and_relations_should_parse_prepare_element(self):
        self.hive_node.append(ET.fromstring(FRAGMENT_QUERY))
        self.hive_node.append(ET.fromstring(FRAGMENT_PREPARE))

        mapper = self._get_hive_mapper(job_properties=self.job_properties,
                                       config=self.config)
        mapper.on_parse_node()

        tasks, relations = mapper.to_tasks_and_relations()

        self.assertEqual(2, len(tasks))
        self.assertEqual(
            Task(
                task_id="test_id_prepare",
                template_name="prepare.tpl",
                template_params={
                    "delete":
                    "/user/TEST_USERNAME/TEST_EXAMPLE_ROOT/apps/pig/output",
                    "mkdir":
                    "/user/TEST_USERNAME/TEST_EXAMPLE_ROOT/apps/pig/created-folder",
                },
            ),
            tasks[0],
        )

        self.assertEqual(
            [Relation(from_task_id="test_id_prepare", to_task_id="test_id")],
            relations)
예제 #26
0
    def test_with_prepare(self):
        cluster = "my-cluster"
        region = "europe-west3"
        job_properties = {"nameNode": "hdfs://localhost:8020"}
        config = {"dataproc_cluster": cluster, "gcp_region": region}
        # language=XML
        pig_node_prepare_str = """
<pig>
    <name-node>hdfs://</name-node>
    <prepare>
        <delete path="${nameNode}/examples/output-data/demo/pig-node" />
        <delete path="${nameNode}/examples/output-data/demo/pig-node2" />
        <mkdir path="${nameNode}/examples/input-data/demo/pig-node" />
        <mkdir path="${nameNode}/examples/input-data/demo/pig-node2" />
    </prepare>
</pig>
"""
        pig_node_prepare = ET.fromstring(pig_node_prepare_str)
        extension = self.get_mapper_extension(
            pig_node_prepare, props=PropertySet(config=config, job_properties=job_properties)
        )
        self.assertTrue(extension.has_prepare())
        task = extension.get_prepare_task()
        self.assertEqual(
            Task(
                task_id="mapper_prepare",
                template_name="prepare.tpl",
                template_params={
                    "delete": "/examples/output-data/demo/pig-node /examples/output-data/demo/pig-node2",
                    "mkdir": "/examples/input-data/demo/pig-node /examples/input-data/demo/pig-node2",
                },
            ),
            task,
        )
예제 #27
0
 def test_last_task_id_of_ok_flow(self):
     task_group = TaskGroup(
         name="task1",
         error_downstream_name="AAAA",
         tasks=[Task(task_id="TASK", template_name="dummy.tpl")],
     )
     self.assertEqual("TASK", task_group.last_task_id_of_ok_flow)
예제 #28
0
    def test_to_tasks_and_relations_with_prepare_node(self):
        spark_node = ET.fromstring(EXAMPLE_XML_WITH_PREPARE)
        mapper = self._get_spark_mapper(spark_node)
        mapper.on_parse_node()

        tasks, relations = mapper.to_tasks_and_relations()

        self.assertEqual(
            [
                Task(
                    task_id="test_id_prepare",
                    template_name="prepare.tpl",
                    template_params={
                        "delete": "/tmp/d_path",
                        "mkdir": "/tmp/mk_path"
                    },
                ),
                Task(
                    task_id="test_id",
                    template_name="spark.tpl",
                    template_params={
                        "main_jar":
                        None,
                        "main_class":
                        "org.apache.spark.examples.mllib.JavaALS",
                        "arguments":
                        ["inputpath=hdfs:///input/file.txt", "value=2"],
                        "hdfs_archives": [],
                        "hdfs_files": [],
                        "job_name":
                        "Spark Examples",
                        "spark_opts": {
                            "spark.executor.extraJavaOptions":
                            "-XX:+HeapDumpOnOutOfMemoryError "
                            "-XX:HeapDumpPath=/tmp"
                        },
                        "dataproc_spark_jars":
                        ["/lib/spark-examples_2.10-1.1.0.jar"],
                    },
                ),
            ],
            tasks,
        )

        self.assertEqual(
            relations,
            [Relation(from_task_id="test_id_prepare", to_task_id="test_id")])
예제 #29
0
 def to_tasks_and_relations(self) -> Tuple[List[Task], List[Relation]]:
     tasks: List[Task] = [
         Task(task_id="TEST_TASK",
              template_name="dummy.tpl",
              template_params={})
     ]
     relations: List[Relation] = []
     return tasks, relations
    def test_convert_tasks_and_relations(self):
        mapper = self._get_dummy_mapper()

        tasks, relations = mapper.to_tasks_and_relations()

        self.assertEqual(tasks,
                         [Task(task_id="test_id", template_name="dummy.tpl")])
        self.assertEqual(relations, [])