def test_replace_el(self):
        # Given
        self.props.job_properties["var1"] = "value1"
        self.props.job_properties["var2"] = "value2"
        # language=XML
        node_str = """
<pig>
    <archive>/path/with/el/${var1}.tar</archive>
    <archive>/path/with/el/${var2}.tar</archive>
    <archive>/path/with/two/els/${var1}/${var2}.tar</archive>
</pig>
        """
        oozie_node = ET.fromstring(node_str)
        archive_extractor = ArchiveExtractor(oozie_node=oozie_node,
                                             props=self.props)
        # When
        archive_extractor.parse_node()
        # Then
        self.assertEqual(
            [
                "hdfs:///path/with/el/{{var1}}.tar",
                "hdfs:///path/with/el/{{var2}}.tar",
                "hdfs:///path/with/two/els/{{var1}}/{{var2}}.tar",
            ],
            archive_extractor.hdfs_archives,
        )
예제 #2
0
 def __init__(
     self,
     oozie_node: ET.Element,
     name: str,
     trigger_rule: str = TriggerRule.ALL_SUCCESS,
     params: Dict[str, str] = None,
     **kwargs,
 ):
     ActionMapper.__init__(self, oozie_node, name, trigger_rule, **kwargs)
     self.params = params or {}
     self.trigger_rule = trigger_rule
     self.java_class = ""
     self.java_jar = ""
     self.job_name = None
     self.jars = []
     self.properties = {}
     self.application_args = []
     self.file_extractor = FileExtractor(oozie_node=oozie_node,
                                         params=self.params)
     self.archive_extractor = ArchiveExtractor(oozie_node=oozie_node,
                                               params=self.params)
     self.prepare_command = None
     self.hdfs_files = []
     self.hdfs_archives = []
     self.dataproc_jars = []
예제 #3
0
 def __init__(
     self,
     oozie_node: Element,
     name: str,
     dag_name: str,
     props: PropertySet,
     jar_files: List[str],
     **kwargs,
 ):
     ActionMapper.__init__(self,
                           oozie_node=oozie_node,
                           dag_name=dag_name,
                           name=name,
                           props=props,
                           **kwargs)
     self.file_extractor = FileExtractor(oozie_node=oozie_node,
                                         props=self.props)
     self.archive_extractor = ArchiveExtractor(oozie_node=oozie_node,
                                               props=self.props)
     self.main_class: Optional[str] = None
     self.java_opts: List[str] = []
     self.args: Optional[List[str]] = None
     self.hdfs_files: Optional[List[str]] = None
     self.hdfs_archives: Optional[List[str]] = None
     self.prepare_extension: PrepareMapperExtension = PrepareMapperExtension(
         self)
     self.jar_files: List[str] = jar_files if jar_files else []
     self.jar_files_in_hdfs: List[str] = []
     self._get_jar_files_in_hdfs_full_paths()
    def test_replace_el(self):
        # Given
        params = {"var1": "value1", "var2": "value2", **self.default_params}
        # language=XML
        node_str = """
<pig>
    <archive>/path/with/el/${var1}.tar</archive>
    <archive>/path/with/el/${var2}.tar</archive>
    <archive>/path/with/two/els/${var1}/${var2}.tar</archive>
</pig>
        """
        oozie_node = ET.fromstring(node_str)
        archive_extractor = ArchiveExtractor(oozie_node=oozie_node,
                                             params=params)
        # When
        archive_extractor.parse_node()
        # Then
        self.assertEqual(
            [
                "hdfs:///path/with/el/value1.tar",
                "hdfs:///path/with/el/value2.tar",
                "hdfs:///path/with/two/els/value1/value2.tar",
            ],
            archive_extractor.hdfs_archives,
        )
예제 #5
0
 def test_add_absolute_archive(self):
     # Given
     archive_extractor = ArchiveExtractor(oozie_node=Element("fake"), props=self.props)
     # When
     archive_extractor.add_archive("/test_archive.zip")
     # Then
     self.assertEqual(["/test_archive.zip"], archive_extractor.archives)
     self.assertEqual(["hdfs:///test_archive.zip"], archive_extractor.hdfs_archives)
 def test_add_absolute_archive(self):
     # Given
     archive_extractor = ArchiveExtractor(oozie_node=Element("fake"),
                                          params=self.default_params)
     # When
     archive_extractor.add_archive("/test_archive.zip")
     # Then
     self.assertEqual(archive_extractor.archives, ["/test_archive.zip"])
     self.assertEqual(archive_extractor.hdfs_archives,
                      ["hdfs:///test_archive.zip"])
예제 #7
0
 def test_add_archive_extra_hash(self):
     # Given
     archive_extractor = ArchiveExtractor(oozie_node=Element("fake"), props=self.props)
     # When
     with self.assertRaises(Exception) as context:
         archive_extractor.add_archive("/test_archive.zip#4rarear#")
     # Then
     self.assertEqual(
         "There should be maximum one '#' in the path /test_archive.zip#4rarear#", str(context.exception)
     )
예제 #8
0
 def test_add_relative_archive(self):
     # Given
     archive_extractor = ArchiveExtractor(oozie_node=Element("fake"), props=self.props)
     # When
     archive_extractor.add_archive("test_archive.zip")
     # Then
     self.assertEqual(["test_archive.zip"], archive_extractor.archives)
     self.assertEqual(
         ["hdfs:///user/pig/examples/pig_test_node/test_archive.zip"], archive_extractor.hdfs_archives
     )
 def __init__(self, oozie_node: Element, name: str, dag_name: str, props: PropertySet, **kwargs):
     ActionMapper.__init__(
         self, oozie_node=oozie_node, name=name, dag_name=dag_name, props=props, **kwargs
     )
     self.params_dict: Dict[str, str] = {}
     self.file_extractor = FileExtractor(oozie_node=oozie_node, props=self.props)
     self.archive_extractor = ArchiveExtractor(oozie_node=oozie_node, props=self.props)
     self.name_node: Optional[str] = None
     self.hdfs_files: Optional[List[str]] = None
     self.hdfs_archives: Optional[List[str]] = None
     self.prepare_extension: PrepareMapperExtension = PrepareMapperExtension(self)
예제 #10
0
 def __init__(self, oozie_node: ET.Element, name: str, props: PropertySet, **kwargs):
     ActionMapper.__init__(self, oozie_node=oozie_node, name=name, props=props, **kwargs)
     self.java_class: Optional[str] = None
     self.java_jar: Optional[str] = None
     self.job_name: Optional[str] = None
     self.jars: List[str] = []
     self.application_args: List[str] = []
     self.file_extractor = FileExtractor(oozie_node=oozie_node, props=self.props)
     self.archive_extractor = ArchiveExtractor(oozie_node=oozie_node, props=self.props)
     self.hdfs_files: List[str] = []
     self.hdfs_archives: List[str] = []
     self.dataproc_jars: List[str] = []
     self.spark_opts: Dict[str, str] = {}
     self.prepare_extension: PrepareMapperExtension = PrepareMapperExtension(self)
예제 #11
0
 def __init__(self, oozie_node: Element, name: str, props: PropertySet,
              **kwargs):
     ActionMapper.__init__(self,
                           oozie_node=oozie_node,
                           name=name,
                           props=props,
                           **kwargs)
     self.params_dict: Dict[str, str] = {}
     self.file_extractor = FileExtractor(oozie_node=oozie_node,
                                         props=self.props)
     self.archive_extractor = ArchiveExtractor(oozie_node=oozie_node,
                                               props=self.props)
     self._parse_oozie_node()
     self.prepare_extension: PrepareMapperExtension = PrepareMapperExtension(
         self)
예제 #12
0
 def __init__(self, oozie_node: Element, name: str, props: PropertySet,
              **kwargs):
     ActionMapper.__init__(self,
                           oozie_node=oozie_node,
                           name=name,
                           props=props,
                           **kwargs)
     self.variables: Optional[Dict[str, str]] = None
     self.query: Optional[str] = None
     self.script: Optional[str] = None
     self.hdfs_files: Optional[List[str]] = None
     self.hdfs_archives: Optional[List[str]] = None
     self.file_extractor = FileExtractor(oozie_node=oozie_node,
                                         props=self.props)
     self.archive_extractor = ArchiveExtractor(oozie_node=oozie_node,
                                               props=self.props)
     self.prepare_extension: PrepareMapperExtension = PrepareMapperExtension(
         self)
예제 #13
0
 def __init__(
     self,
     oozie_node: Element,
     name: str,
     trigger_rule: str = TriggerRule.ALL_SUCCESS,
     params=None,
     **kwargs,
 ):
     ActionMapper.__init__(self, oozie_node=oozie_node, name=name, trigger_rule=trigger_rule, **kwargs)
     if params is None:
         params = dict()
     self.params = params
     self.trigger_rule = trigger_rule
     self.properties = {}
     self.params_dict = {}
     self.file_extractor = FileExtractor(oozie_node=oozie_node, params=params)
     self.archive_extractor = ArchiveExtractor(oozie_node=oozie_node, params=params)
     self._parse_oozie_node()
예제 #14
0
 def test_add_multiple_archives(self):
     # Given
     archive_extractor = ArchiveExtractor(oozie_node=Element("fake"), props=self.props)
     # When
     archive_extractor.add_archive("/test_archive.zip")
     archive_extractor.add_archive("test_archive2.tar")
     archive_extractor.add_archive("/test_archive3.tar.gz")
     # Then
     self.assertEqual(
         archive_extractor.archives, ["/test_archive.zip", "test_archive2.tar", "/test_archive3.tar.gz"]
     )
     self.assertEqual(
         archive_extractor.hdfs_archives,
         [
             "hdfs:///test_archive.zip",
             "hdfs:///user/pig/examples/pig_test_node/test_archive2.tar",
             "hdfs:///test_archive3.tar.gz",
         ],
     )
 def test_add_hash_archives(self):
     # Given
     archive_extractor = ArchiveExtractor(oozie_node=Element("fake"),
                                          params=self.default_params)
     # When
     archive_extractor.add_archive("/test_archive.zip#test3_link")
     archive_extractor.add_archive("test_archive2.tar#test_link")
     archive_extractor.add_archive("/test_archive3.tar.gz")
     # Then
     self.assertEqual(
         archive_extractor.archives,
         [
             "/test_archive.zip#test3_link", "test_archive2.tar#test_link",
             "/test_archive3.tar.gz"
         ],
     )
     self.assertEqual(
         archive_extractor.hdfs_archives,
         [
             "hdfs:///test_archive.zip#test3_link",
             "hdfs:///user/pig/examples/pig_test_node/test_archive2.tar#test_link",
             "hdfs:///test_archive3.tar.gz",
         ],
     )