def test_extract_properties_from_job_xml_nodes_minimal_green_path(self, parse_mock):
        # language=XML
        action = ET.ElementTree(
            ET.fromstring(
                """
    <action>
        <job-xml>aaa.xml</job-xml>
    </action>
"""
            )
        )
        # language=XML
        parse_mock.return_value = ET.ElementTree(
            ET.fromstring(
                """
    <configuration>
        <property>
            <name>KEY1</name>
            <value>VALUE1</value>
        </property>
        <property>
            <name>KEY2</name>
            <value>VALUE2</value>
        </property>
    </configuration>
"""
            )
        )
        job_xml_nodes = find_nodes_by_tag(action, TAG_JOB_XML)
        result = extract_properties_from_job_xml_nodes(
            job_xml_nodes, input_directory_path="/tmp/no-error-path", props=PropertySet()
        )

        parse_mock.assert_called_once_with("/tmp/no-error-path/hdfs/aaa.xml")
        self.assertEqual(result, {"KEY1": "VALUE1", "KEY2": "VALUE2"})
    def test_parse_els_multiple_line_with_back_references(self):
        # Should remain unchanged, as the conversion from a comma-separated string to a List will
        # occur before writing to file.
        prop_file = tempfile.NamedTemporaryFile("w", delete=False)
        prop_file.write("""
#comment
key=value,value2,${test}
key2=value
key3=refer${key2}
key4=refer${key5}
key5=test
""")
        prop_file.close()

        job_properties = {"test": "answer"}
        props = PropertySet(config={},
                            job_properties=job_properties,
                            action_node_properties={})
        expected = {
            "key": "value,value2,answer",
            "key2": "value",
            "key3": "refervalue",
            "key4": "refer${key5}",  # no forward-references
            "key5": "test",
        }
        self.assertEqual(
            expected,
            el_utils.extract_evaluate_properties(prop_file.name, props=props))
 def _get_shell_mapper(self, job_properties, config):
     return shell_mapper.ShellMapper(
         oozie_node=self.shell_node,
         name="test_id",
         dag_name="DAG_NAME_B",
         props=PropertySet(job_properties=job_properties, config=config),
     )
Beispiel #4
0
    def test_convert_to_text_without_prepare_node(self):
        spark_node = ET.fromstring(EXAMPLE_XML)
        prepare_node = spark_node.find("prepare")
        spark_node.remove(prepare_node)
        mapper = self._get_git_mapper(spark_node)
        mapper.on_parse_node()

        tasks, relations = mapper.to_tasks_and_relations()

        self.assertEqual(
            [
                Task(
                    task_id="test_id",
                    template_name="git.tpl",
                    template_params={
                        "git_uri": "https://github.com/apache/oozie",
                        "git_branch": "my-awesome-branch",
                        "destination_path": "/my_git_repo_directory",
                        "key_path": "/awesome-key/",
                        "props": PropertySet(
                            config={"dataproc_cluster": "my-cluster", "gcp_region": "europe-west3"},
                            job_properties={
                                "branch": "my-awesome-branch",
                                "nameNode": "hdfs://",
                                "userName": "******",
                                "examplesRoot": "examples",
                            },
                            action_node_properties={},
                        ),
                    },
                )
            ],
            tasks,
        )
        self.assertEqual([], relations)
Beispiel #5
0
    def test_task_and_relations(self):
        # Given
        mapper = _get_distcp_mapper(self.distcp_node,
                                    job_properties=EXAMPLE_JOB_PROPERTIES,
                                    config=EXAMPLE_CONFIG_PROPERTIES)

        # When
        mapper.on_parse_node()
        tasks, relations = mapper.to_tasks_and_relations()

        # Then
        self.assertEqual(mapper.oozie_node, self.distcp_node)
        self.assertIsNotNone(tasks)
        self.assertIsNotNone(relations)
        self.assertEqual(2, len(tasks))
        self.assertEqual(1, len(relations))
        self.assertEqual(
            [
                Task(
                    task_id="distcp_prepare",
                    template_name="prepare.tpl",
                    trigger_rule="one_success",
                    template_params={
                        "delete": "/tmp/d_path",
                        "mkdir": None
                    },
                ),
                Task(
                    task_id="distcp",
                    template_name="distcp.tpl",
                    trigger_rule="one_success",
                    template_params={
                        "props":
                        PropertySet(
                            config={
                                "dataproc_cluster": "my-cluster",
                                "gcp_region": "europe-west3"
                            },
                            job_properties={
                                "nameNode1": "hdfs://localhost:8081",
                                "nameNode2": "hdfs://localhost:8082",
                            },
                            action_node_properties={
                                "oozie.launcher.mapreduce.job.hdfs-servers":
                                "{{nameNode1}} ,{{nameNode2}}"
                            },
                        ),
                        "distcp_command":
                        "--class=org.apache.hadoop.tools.DistCp -- -update -skipcrccheck "
                        "-strategy dynamic '{{nameNode1}}/path/to/input file.txt' "
                        "'{{nameNode2}}/path/to/output-file.txt'",
                    },
                ),
            ],
            tasks,
        )
        self.assertEqual([
            Relation(from_task_id=f"{mapper.name}_prepare",
                     to_task_id=mapper.name)
        ], relations)
Beispiel #6
0
    def test_should_add_end_success_workflow_node(self):
        # Given
        transformer = AddWorkflowNotificationTransformer()
        workflow = Workflow(input_directory_path="",
                            output_directory_path="",
                            dag_name="DAG_NAME_B")
        props = PropertySet(
            job_properties={
                PROP_WORKFLOW_NOTIFICATION_URL: "http://example.com/workflow"
            })
        first_task_group = TaskGroup(
            name="first_task",
            tasks=[Task(task_id="first_task", template_name="dummy.tpl")])

        # When
        workflow.task_groups[first_task_group.name] = first_task_group

        # Then
        transformer.process_workflow_after_convert_nodes(workflow, props)
        self.assertIn(END_SUCCESS_TASK_GROUP_NAME, workflow.task_groups.keys())
        self.assertIn(END_SUCCESS_TASK_GROUP_NAME,
                      first_task_group.downstream_names)
        self.assertEqual(
            [
                Task(
                    task_id=END_SUCCESS_TASK_GROUP_NAME,
                    template_name="http.tpl",
                    trigger_rule="one_success",
                    template_params={"url": "http://example.com/workflow"},
                )
            ],
            workflow.task_groups[END_SUCCESS_TASK_GROUP_NAME].tasks,
        )
 def test_extract_param_values_from_action_node_should_support_el_value(
         self):
     props = PropertySet(config={},
                         job_properties={
                             "userName": "******",
                             "examplesRoot": "TEST_EXAMPLE_ROOT"
                         })
     # language=XML
     xml_content = """
     <fragment>
         <param>INPUT=/user/${userName}/${examplesRoot}/apps/hive/input/</param>
         <param>OUTPUT=/user/${userName}/${examplesRoot}/apps/hive/output/</param>
     </fragment>
     """
     node = ET.fromstring(xml_content)
     result = extract_param_values_from_action_node(node, props=props)
     self.assertEqual(
         {
             "INPUT":
             "/user/TEST_USERNAME/TEST_EXAMPLE_ROOT/apps/hive/input/",
             "OUTPUT":
             "/user/TEST_USERNAME/TEST_EXAMPLE_ROOT/apps/hive/output/",
         },
         result,
     )
    def test_create_workflow_file_should_format_file(self, open_mock,
                                                     render_template_mock,
                                                     sort_imports_mock,
                                                     autoflake_fix_file_mock,
                                                     black_mock):
        renderer = self._create_renderer()
        workflow = _create_workflow()
        props = PropertySet(config=dict(), job_properties=dict())

        renderer.create_workflow_file(workflow, props=props)
        black_mock.format_file_in_place.assert_called_once_with(
            Path("/tmp/output/DAG_NAME.py"),
            fast=mock.ANY,
            mode=mock.ANY,
            write_back=mock.ANY)
        autoflake_fix_file_mock.assert_called_once_with(
            "/tmp/output/DAG_NAME.py",
            args=AutoflakeArgs(
                remove_all_unused_imports=True,
                ignore_init_module_imports=False,
                remove_duplicate_keys=False,
                remove_unused_variables=True,
                in_place=True,
                imports=None,
                expand_star_imports=False,
                check=False,
            ),
            standard_out=sys.stdout,
        )
        sort_imports_mock.assert_called_once_with("/tmp/output/DAG_NAME.py")
 def __init__(
     self,
     dag_name: str,
     input_directory_path: str,
     output_directory_path: str,
     action_mapper: Dict[str, Type[ActionMapper]],
     renderer: BaseRenderer,
     transformers: List[BaseWorkflowTransformer] = None,
     user: str = None,
     initial_props: PropertySet = None,
 ):
     self.workflow = Workflow(
         dag_name=dag_name,
         input_directory_path=input_directory_path,
         output_directory_path=output_directory_path,
     )
     self.renderer = renderer
     self.transformers = transformers or []
     # Propagate the configuration in case initial property set is passed
     job_properties = {} if not initial_props else initial_props.job_properties
     job_properties["user.name"] = user or os.environ["USER"]
     self.props = PropertySet(job_properties=job_properties)
     self.property_parser = PropertyParser(props=self.props,
                                           workflow=self.workflow)
     self.parser = parser.OozieParser(props=self.props,
                                      action_mapper=action_mapper,
                                      renderer=self.renderer,
                                      workflow=self.workflow)
Beispiel #10
0
 def test_arguments_are_parsed_correctly_without_jar_files(self):
     mapper = self._get_java_mapper(
         job_properties={
             "userName":
             "******",
             "oozie.wf.application.path":
             "hdfs:///user/USER/examples/apps/java",
         },
         config={},
     )
     mapper.on_parse_node()
     self.assertEqual("test_id", mapper.name)
     self.assertEqual("org.apache.oozie.example.DemoJavaMain",
                      mapper.main_class)
     self.assertEqual(["-Dtest1=val1", "-Dtest2=val2"], mapper.java_opts)
     self.assertEqual(
         PropertySet(
             config={},
             job_properties={
                 "userName":
                 "******",
                 "oozie.wf.application.path":
                 "hdfs:///user/USER/examples/apps/java",
             },
             action_node_properties={
                 "mapred.job.queue.name": "${queueName}"
             },
         ),
         mapper.props,
     )
     self.assertEqual([], mapper.jar_files_in_hdfs)
     self.assertEqual([], mapper.jar_files)
Beispiel #11
0
    def test_with_prepare(self):
        cluster = "my-cluster"
        region = "europe-west3"
        job_properties = {"nameNode": "hdfs://localhost:8020"}
        config = {"dataproc_cluster": cluster, "gcp_region": region}
        # language=XML
        pig_node_prepare_str = """
<pig>
    <name-node>hdfs://</name-node>
    <prepare>
        <delete path="${nameNode}/examples/output-data/demo/pig-node" />
        <delete path="${nameNode}/examples/output-data/demo/pig-node2" />
        <mkdir path="${nameNode}/examples/input-data/demo/pig-node" />
        <mkdir path="${nameNode}/examples/input-data/demo/pig-node2" />
    </prepare>
</pig>
"""
        pig_node_prepare = ET.fromstring(pig_node_prepare_str)
        extension = self.get_mapper_extension(
            pig_node_prepare, props=PropertySet(config=config, job_properties=job_properties)
        )
        self.assertTrue(extension.has_prepare())
        task = extension.get_prepare_task()
        self.assertEqual(
            Task(
                task_id="mapper_prepare",
                template_name="prepare.tpl",
                template_params={
                    "delete": "/examples/output-data/demo/pig-node /examples/output-data/demo/pig-node2",
                    "mkdir": "/examples/input-data/demo/pig-node /examples/input-data/demo/pig-node2",
                },
            ),
            task,
        )
 def test_parse_els_no_file(self):
     expected_properties = {}
     props = PropertySet(job_properties={"key": "value"},
                         config={},
                         action_node_properties={})
     self.assertEqual(expected_properties,
                      el_utils.parse_els(None, props=props))
    def test_to_tasks_and_relations(self):
        mapper = self._get_ssh_mapper(job_properties={}, config={})

        tasks, relations = mapper.to_tasks_and_relations()

        self.assertEqual(
            [
                Task(
                    task_id="test_id",
                    template_name="ssh.tpl",
                    template_params={
                        "props":
                        PropertySet(config={},
                                    job_properties={},
                                    action_node_properties={}),
                        "command":
                        "'ls -l -a'",
                        "user":
                        "******",
                        "host":
                        "apache.org",
                    },
                )
            ],
            tasks,
        )
        self.assertEqual(relations, [])
Beispiel #14
0
 def _get_mapreduce_mapper(self, job_properties, config):
     return mapreduce_mapper.MapReduceMapper(
         oozie_node=self.mapreduce_node,
         name="test_id",
         dag_name="DAG_NAME_B",
         props=PropertySet(job_properties=job_properties, config=config),
     )
Beispiel #15
0
def _get_fs_mapper(oozie_node):
    return fs_mapper.FsMapper(
        oozie_node=oozie_node,
        name="test_id",
        dag_name="DAG_NAME_B",
        props=PropertySet(job_properties={"nameNode": "hdfs://"}, config={}),
    )
Beispiel #16
0
 def test_to_tasks_and_relations(self):
     mapper = self._get_email_mapper(job_properties={"userName": "******"},
                                     config={})
     mapper.on_parse_node()
     tasks, relations = mapper.to_tasks_and_relations()
     self.assertEqual(
         [
             Task(
                 task_id="test_id",
                 template_name="email.tpl",
                 trigger_rule="one_success",
                 template_params={
                     "props":
                     PropertySet(config={},
                                 job_properties={"userName": "******"},
                                 action_node_properties={}),
                     "to_addr":
                     "[email protected],[email protected]",
                     "cc_addr":
                     "[email protected],[email protected]",
                     "bcc_addr":
                     "[email protected],[email protected]",
                     "subject":
                     "Email notifications for {{run_id}}",
                     "body":
                     "Hi {{userName}} , the wf {{run_id}} successfully "
                     "completed. Bye {{userName}}",
                 },
             )
         ],
         tasks,
     )
     self.assertEqual(relations, [])
    def test_to_tasks_and_relations(self):
        job_properties = {"nameNode": "hdfs://"}
        config = {
            "dataproc_cluster": "my-cluster",
            "gcp_region": "europe-west3"
        }
        mapper = self._get_pig_mapper(job_properties=job_properties,
                                      config=config)
        mapper.on_parse_node()
        tasks, relations = mapper.to_tasks_and_relations()

        self.assertEqual(
            tasks,
            [
                Task(
                    task_id="test_id_prepare",
                    template_name="prepare.tpl",
                    template_params={
                        "delete":
                        "/examples/output-data/demo/pig-node /examples/output-data/demo/pig-node2",
                        "mkdir":
                        "/examples/input-data/demo/pig-node /examples/input-data/demo/pig-node2",
                    },
                ),
                Task(
                    task_id="test_id",
                    template_name="pig.tpl",
                    template_params={
                        "props":
                        PropertySet(
                            config={
                                "dataproc_cluster": "my-cluster",
                                "gcp_region": "europe-west3"
                            },
                            job_properties={"nameNode": "hdfs://"},
                            action_node_properties={
                                "mapred.job.queue.name": "${queueName}",
                                "mapred.map.output.compress": "false",
                            },
                        ),
                        "params_dict": {
                            "INPUT":
                            "/user/${wf:user()}/${examplesRoot}/input-data/text",
                            "OUTPUT":
                            "/user/${wf:user()}/${examplesRoot}/output-data/demo/pig-node",
                        },
                        "script_file_name":
                        "id.pig",
                        "action_node_properties": {
                            "mapred.job.queue.name": "${queueName}",
                            "mapred.map.output.compress": "false",
                        },
                    },
                ),
            ],
        )
        self.assertEqual(
            [Relation(from_task_id="test_id_prepare", to_task_id="test_id")],
            relations)
Beispiel #18
0
 def _get_shell_mapper(self, job_properties, config):
     return shell_mapper.ShellMapper(
         oozie_node=self.shell_node,
         name="test_id",
         dag_name="DAG_NAME_B",
         props=PropertySet(job_properties=job_properties, config=config),
         input_directory_path="/tmp/input-directory-path/",
     )
Beispiel #19
0
 def _get_start_mapper(self, name="test_id"):
     mapper = StartMapper(
         oozie_node=self.oozie_node,
         name=name,
         dag_name="DAG_NAME_B",
         props=PropertySet(config={}, job_properties={}),
     )
     return mapper
 def test_prepare_move_command(self, xml, command):
     node = ET.fromstring(xml)
     self.assertEqual(
         command,
         fs_mapper.prepare_move_command(
             node, props=PropertySet(job_properties=TEST_JOB_PROPS, config=TEST_CONFIG)
         ),
     )
def _get_fs_mapper(oozie_node):
    return fs_mapper.FsMapper(
        oozie_node=oozie_node,
        name="test_id",
        dag_name="DAG_NAME_B",
        props=PropertySet(job_properties={"nameNode": "hdfs://"}, config={}),
        input_directory_path="/tmp/input-directory-path/",
    )
Beispiel #22
0
 def _get_git_mapper(spark_node):
     mapper = git_mapper.GitMapper(
         oozie_node=spark_node,
         name="test_id",
         dag_name="DAG_NAME_B",
         props=PropertySet(job_properties=EXAMPLE_JOB_PROPS, config=EXAMPLE_CONFIG),
     )
     return mapper
 def _get_pig_mapper(self, job_properties, config):
     mapper = pig_mapper.PigMapper(
         oozie_node=self.pig_node,
         name="test_id",
         dag_name="DAG_NAME_B",
         props=PropertySet(job_properties=job_properties, config=config),
     )
     return mapper
 def get_child_props(self) -> PropertySet:
     propagate_configuration = self.oozie_node.find(
         "propagate-configuration")
     # Below the `is not None` is necessary due to Element's __bool__() return value:
     # `len(self._children) != 0`,
     # and `propagate_configuration` is an empty node so __bool__() will always return False.
     return (self.props if propagate_configuration is not None else
             PropertySet(config={}, job_properties={}))
    def test_extract_properties_from_configuration_node_when_empty(self):
        # language=XML
        config_node_str = """
    <configuration>
    </configuration>
"""
        config_node = ET.fromstring(config_node_str)
        properties = extract_properties_from_configuration_node(config_node, props=PropertySet())
        self.assertEqual(properties, {})
 def test_normalize_path_red_path(self, oozie_path):
     cluster = "my-cluster"
     region = "europe-west3"
     job_properties = {"nameNode": "hdfs://localhost:8020"}
     config = {"dataproc_cluster": cluster, "gcp_region": region}
     with self.assertRaisesRegex(ParseException, "Unknown path format. "):
         normalize_path(oozie_path,
                        props=PropertySet(config=config,
                                          job_properties=job_properties))
Beispiel #27
0
 def _get_spark_mapper(spark_node):
     mapper = spark_mapper.SparkMapper(
         oozie_node=spark_node,
         name="test_id",
         dag_name="DAG_NAME_B",
         props=PropertySet(job_properties=EXAMPLE_JOB_PROPS, config=EXAMPLE_CONFIG),
         input_directory_path="/tmp/input-directory-path/",
     )
     return mapper
Beispiel #28
0
 def _get_email_mapper(self, job_properties, config):
     mapper = email_mapper.EmailMapper(
         oozie_node=self.email_node,
         name="test_id",
         dag_name="DAG_NAME_A",
         props=PropertySet(job_properties=job_properties, config=config),
         input_directory_path="/tmp/input-directory-path/",
     )
     return mapper
Beispiel #29
0
 def setUp(self):
     self.job_properties = {
         "nameNode": "hdfs://",
         "oozie.wf.application.path":
         "hdfs:///user/pig/examples/pig_test_node",
     }
     self.props = PropertySet(job_properties=self.job_properties,
                              config={},
                              action_node_properties={})
Beispiel #30
0
 def setUp(self):
     props = PropertySet(job_properties={}, config={})
     workflow = Workflow(input_directory_path=EXAMPLE_DEMO_PATH,
                         output_directory_path="/tmp",
                         dag_name="DAG_NAME_B")
     self.parser = parser.OozieParser(workflow=workflow,
                                      props=props,
                                      action_mapper=ACTION_MAP,
                                      renderer=mock.MagicMock())