def test_replace_el(self): # Given self.props.job_properties["var1"] = "value1" self.props.job_properties["var2"] = "value2" # language=XML node_str = """ <pig> <archive>/path/with/el/${var1}.tar</archive> <archive>/path/with/el/${var2}.tar</archive> <archive>/path/with/two/els/${var1}/${var2}.tar</archive> </pig> """ oozie_node = ET.fromstring(node_str) archive_extractor = ArchiveExtractor(oozie_node=oozie_node, props=self.props) # When archive_extractor.parse_node() # Then self.assertEqual( [ "hdfs:///path/with/el/{{var1}}.tar", "hdfs:///path/with/el/{{var2}}.tar", "hdfs:///path/with/two/els/{{var1}}/{{var2}}.tar", ], archive_extractor.hdfs_archives, )
def __init__( self, oozie_node: ET.Element, name: str, trigger_rule: str = TriggerRule.ALL_SUCCESS, params: Dict[str, str] = None, **kwargs, ): ActionMapper.__init__(self, oozie_node, name, trigger_rule, **kwargs) self.params = params or {} self.trigger_rule = trigger_rule self.java_class = "" self.java_jar = "" self.job_name = None self.jars = [] self.properties = {} self.application_args = [] self.file_extractor = FileExtractor(oozie_node=oozie_node, params=self.params) self.archive_extractor = ArchiveExtractor(oozie_node=oozie_node, params=self.params) self.prepare_command = None self.hdfs_files = [] self.hdfs_archives = [] self.dataproc_jars = []
def __init__( self, oozie_node: Element, name: str, dag_name: str, props: PropertySet, jar_files: List[str], **kwargs, ): ActionMapper.__init__(self, oozie_node=oozie_node, dag_name=dag_name, name=name, props=props, **kwargs) self.file_extractor = FileExtractor(oozie_node=oozie_node, props=self.props) self.archive_extractor = ArchiveExtractor(oozie_node=oozie_node, props=self.props) self.main_class: Optional[str] = None self.java_opts: List[str] = [] self.args: Optional[List[str]] = None self.hdfs_files: Optional[List[str]] = None self.hdfs_archives: Optional[List[str]] = None self.prepare_extension: PrepareMapperExtension = PrepareMapperExtension( self) self.jar_files: List[str] = jar_files if jar_files else [] self.jar_files_in_hdfs: List[str] = [] self._get_jar_files_in_hdfs_full_paths()
def test_replace_el(self): # Given params = {"var1": "value1", "var2": "value2", **self.default_params} # language=XML node_str = """ <pig> <archive>/path/with/el/${var1}.tar</archive> <archive>/path/with/el/${var2}.tar</archive> <archive>/path/with/two/els/${var1}/${var2}.tar</archive> </pig> """ oozie_node = ET.fromstring(node_str) archive_extractor = ArchiveExtractor(oozie_node=oozie_node, params=params) # When archive_extractor.parse_node() # Then self.assertEqual( [ "hdfs:///path/with/el/value1.tar", "hdfs:///path/with/el/value2.tar", "hdfs:///path/with/two/els/value1/value2.tar", ], archive_extractor.hdfs_archives, )
def test_add_absolute_archive(self): # Given archive_extractor = ArchiveExtractor(oozie_node=Element("fake"), props=self.props) # When archive_extractor.add_archive("/test_archive.zip") # Then self.assertEqual(["/test_archive.zip"], archive_extractor.archives) self.assertEqual(["hdfs:///test_archive.zip"], archive_extractor.hdfs_archives)
def test_add_absolute_archive(self): # Given archive_extractor = ArchiveExtractor(oozie_node=Element("fake"), params=self.default_params) # When archive_extractor.add_archive("/test_archive.zip") # Then self.assertEqual(archive_extractor.archives, ["/test_archive.zip"]) self.assertEqual(archive_extractor.hdfs_archives, ["hdfs:///test_archive.zip"])
def test_add_archive_extra_hash(self): # Given archive_extractor = ArchiveExtractor(oozie_node=Element("fake"), props=self.props) # When with self.assertRaises(Exception) as context: archive_extractor.add_archive("/test_archive.zip#4rarear#") # Then self.assertEqual( "There should be maximum one '#' in the path /test_archive.zip#4rarear#", str(context.exception) )
def test_add_relative_archive(self): # Given archive_extractor = ArchiveExtractor(oozie_node=Element("fake"), props=self.props) # When archive_extractor.add_archive("test_archive.zip") # Then self.assertEqual(["test_archive.zip"], archive_extractor.archives) self.assertEqual( ["hdfs:///user/pig/examples/pig_test_node/test_archive.zip"], archive_extractor.hdfs_archives )
def __init__(self, oozie_node: Element, name: str, dag_name: str, props: PropertySet, **kwargs): ActionMapper.__init__( self, oozie_node=oozie_node, name=name, dag_name=dag_name, props=props, **kwargs ) self.params_dict: Dict[str, str] = {} self.file_extractor = FileExtractor(oozie_node=oozie_node, props=self.props) self.archive_extractor = ArchiveExtractor(oozie_node=oozie_node, props=self.props) self.name_node: Optional[str] = None self.hdfs_files: Optional[List[str]] = None self.hdfs_archives: Optional[List[str]] = None self.prepare_extension: PrepareMapperExtension = PrepareMapperExtension(self)
def __init__(self, oozie_node: ET.Element, name: str, props: PropertySet, **kwargs): ActionMapper.__init__(self, oozie_node=oozie_node, name=name, props=props, **kwargs) self.java_class: Optional[str] = None self.java_jar: Optional[str] = None self.job_name: Optional[str] = None self.jars: List[str] = [] self.application_args: List[str] = [] self.file_extractor = FileExtractor(oozie_node=oozie_node, props=self.props) self.archive_extractor = ArchiveExtractor(oozie_node=oozie_node, props=self.props) self.hdfs_files: List[str] = [] self.hdfs_archives: List[str] = [] self.dataproc_jars: List[str] = [] self.spark_opts: Dict[str, str] = {} self.prepare_extension: PrepareMapperExtension = PrepareMapperExtension(self)
def __init__(self, oozie_node: Element, name: str, props: PropertySet, **kwargs): ActionMapper.__init__(self, oozie_node=oozie_node, name=name, props=props, **kwargs) self.params_dict: Dict[str, str] = {} self.file_extractor = FileExtractor(oozie_node=oozie_node, props=self.props) self.archive_extractor = ArchiveExtractor(oozie_node=oozie_node, props=self.props) self._parse_oozie_node() self.prepare_extension: PrepareMapperExtension = PrepareMapperExtension( self)
def __init__(self, oozie_node: Element, name: str, props: PropertySet, **kwargs): ActionMapper.__init__(self, oozie_node=oozie_node, name=name, props=props, **kwargs) self.variables: Optional[Dict[str, str]] = None self.query: Optional[str] = None self.script: Optional[str] = None self.hdfs_files: Optional[List[str]] = None self.hdfs_archives: Optional[List[str]] = None self.file_extractor = FileExtractor(oozie_node=oozie_node, props=self.props) self.archive_extractor = ArchiveExtractor(oozie_node=oozie_node, props=self.props) self.prepare_extension: PrepareMapperExtension = PrepareMapperExtension( self)
def __init__( self, oozie_node: Element, name: str, trigger_rule: str = TriggerRule.ALL_SUCCESS, params=None, **kwargs, ): ActionMapper.__init__(self, oozie_node=oozie_node, name=name, trigger_rule=trigger_rule, **kwargs) if params is None: params = dict() self.params = params self.trigger_rule = trigger_rule self.properties = {} self.params_dict = {} self.file_extractor = FileExtractor(oozie_node=oozie_node, params=params) self.archive_extractor = ArchiveExtractor(oozie_node=oozie_node, params=params) self._parse_oozie_node()
def test_add_multiple_archives(self): # Given archive_extractor = ArchiveExtractor(oozie_node=Element("fake"), props=self.props) # When archive_extractor.add_archive("/test_archive.zip") archive_extractor.add_archive("test_archive2.tar") archive_extractor.add_archive("/test_archive3.tar.gz") # Then self.assertEqual( archive_extractor.archives, ["/test_archive.zip", "test_archive2.tar", "/test_archive3.tar.gz"] ) self.assertEqual( archive_extractor.hdfs_archives, [ "hdfs:///test_archive.zip", "hdfs:///user/pig/examples/pig_test_node/test_archive2.tar", "hdfs:///test_archive3.tar.gz", ], )
def test_add_hash_archives(self): # Given archive_extractor = ArchiveExtractor(oozie_node=Element("fake"), params=self.default_params) # When archive_extractor.add_archive("/test_archive.zip#test3_link") archive_extractor.add_archive("test_archive2.tar#test_link") archive_extractor.add_archive("/test_archive3.tar.gz") # Then self.assertEqual( archive_extractor.archives, [ "/test_archive.zip#test3_link", "test_archive2.tar#test_link", "/test_archive3.tar.gz" ], ) self.assertEqual( archive_extractor.hdfs_archives, [ "hdfs:///test_archive.zip#test3_link", "hdfs:///user/pig/examples/pig_test_node/test_archive2.tar#test_link", "hdfs:///test_archive3.tar.gz", ], )