Exemple #1
0
 def _parse_oozie_node(self):
     res_man_text = self.oozie_node.find("resource-manager").text
     name_node_text = self.oozie_node.find("name-node").text
     script = self.oozie_node.find("script").text
     self.resource_manager = el_utils.replace_el_with_var(res_man_text, params=self.params, quote=False)
     self.name_node = el_utils.replace_el_with_var(name_node_text, params=self.params, quote=False)
     self.script_file_name = el_utils.replace_el_with_var(script, params=self.params, quote=False)
     self._parse_config()
     self._parse_params()
     self.files, self.hdfs_files = self.file_extractor.parse_node()
     self.archives, self.hdfs_archives = self.archive_extractor.parse_node()
 def _parse_oozie_node(self):
     res_man_text = self.oozie_node.find("resource-manager").text
     name_node_text = self.oozie_node.find("name-node").text
     self.resource_manager = el_utils.replace_el_with_var(res_man_text,
                                                          props=self.props,
                                                          quote=False)
     self.name_node = el_utils.replace_el_with_var(name_node_text,
                                                   props=self.props,
                                                   quote=False)
     cmd_node = self.oozie_node.find("exec")
     arg_nodes = self.oozie_node.findall("argument")
     cmd = " ".join([cmd_node.text] + [x.text for x in arg_nodes])
     self.bash_command = el_utils.convert_el_to_jinja(cmd, quote=False)
     self.pig_command = f"sh {self.bash_command}"
    def test_replace_el_with_var_var_quote(self):
        params = {"hostname": "*****@*****.**"}
        el_var = "${hostname}"
        expected = "'*****@*****.**'"

        replaced = el_utils.replace_el_with_var(el_var, params, quote=True)
        self.assertEqual(replaced, expected)
    def on_parse_node(self):

        if self.has_prepare:
            self.prepare_command = self.get_prepare_command(oozie_node=self.oozie_node, params=self.params)

        _, self.hdfs_files = self.file_extractor.parse_node()
        _, self.hdfs_archives = self.archive_extractor.parse_node()

        self.java_jar = self._get_or_default(self.oozie_node, SPARK_TAG_JAR, None, params=self.params)
        self.java_class = self._get_or_default(self.oozie_node, SPARK_TAG_CLASS, None, params=self.params)
        if self.java_class and self.java_jar:
            self.dataproc_jars = [self.java_jar]
            self.java_jar = None
        self.job_name = self._get_or_default(self.oozie_node, SPARK_TAG_JOB_NAME, None, params=self.params)

        job_xml_nodes = xml_utils.find_nodes_by_tag(self.oozie_node, SPARK_TAG_JOB_XML)

        for xml_file in job_xml_nodes:
            tree = ET.parse(source=xml_file.text)
            self.properties.update(self._parse_config_node(tree.getroot()))

        config_nodes = xml_utils.find_nodes_by_tag(self.oozie_node, SPARK_TAG_CONFIGURATION)
        if config_nodes:
            self.properties.update(self._parse_config_node(config_nodes[0]))

        spark_opts = xml_utils.find_nodes_by_tag(self.oozie_node, SPARK_TAG_OPTS)
        if spark_opts:
            self.properties.update(self._parse_spark_opts(spark_opts[0]))

        app_args = xml_utils.find_nodes_by_tag(self.oozie_node, SPARK_TAG_ARGS)
        for arg in app_args:
            self.application_args.append(el_utils.replace_el_with_var(arg.text, self.params, quote=False))
 def on_parse_node(self):
     name_node_text = self.oozie_node.find("name-node").text
     self.name_node = el_utils.replace_el_with_var(name_node_text, params=self.params, quote=False)
     self._parse_config()
     self._parse_params()
     _, self.hdfs_files = self.file_extractor.parse_node()
     _, self.hdfs_archives = self.archive_extractor.parse_node()
def extract_properties_from_configuration_node(
        config_node: ET.Element, props: PropertySet) -> Dict[str, str]:
    """Extracts configuration properties from ``configuration`` node"""
    properties_dict: Dict[str, str] = dict()
    for property_node in config_node.findall(TAG_PROPERTY):
        name_node = property_node.find(TAG_NAME)
        value_node = property_node.find(TAG_VALUE)

        if name_node is None or value_node is None:
            raise ParseException(
                'Element "property" should have direct children elements: name, value. One of them does not '
                "exist. Make sure the configuration element is valid.")

        name = name_node.text
        value = value_node.text

        if not name:
            raise ParseException(
                'Element "name" should have content, however its value is empty. Make sure the element has '
                "the correct content.")

        if not value:
            raise ParseException(
                'Element "value" should have content, however its value is empty. Make sure the element has '
                "the correct content.")

        properties_dict[name] = el_utils.replace_el_with_var(value,
                                                             props=props,
                                                             quote=False)

    return properties_dict
 def _parse_oozie_node(self):
     res_man_text = self.oozie_node.find("resource-manager").text
     name_node_text = self.oozie_node.find("name-node").text
     script = self.oozie_node.find("script").text
     self.resource_manager = el_utils.replace_el_with_var(res_man_text,
                                                          props=self.props,
                                                          quote=False)
     self.name_node = el_utils.replace_el_with_var(name_node_text,
                                                   props=self.props,
                                                   quote=False)
     self.script_file_name = el_utils.replace_el_with_var(script,
                                                          props=self.props,
                                                          quote=False)
     self.params_dict = extract_param_values_from_action_node(
         self.oozie_node, props=self.props)
     self.files, self.hdfs_files = self.file_extractor.parse_node()
     self.archives, self.hdfs_archives = self.archive_extractor.parse_node()
    def test_replace_el_with_var_func_quote(self):
        # functions shouldn't be replaced
        params = {}
        el_var = '${concat("abc", "def")}'
        expected = '\'${concat("abc", "def")}\''

        replaced = el_utils.replace_el_with_var(el_var, params, quote=True)
        self.assertEqual(replaced, expected)
 def _parse_params(self):
     param_nodes = xml_utils.find_nodes_by_tag(self.oozie_node, "param")
     if param_nodes:
         self.params_dict = {}
         for node in param_nodes:
             param = el_utils.replace_el_with_var(node.text, params=self.params, quote=False)
             key, value = param.split("=", 1)
             self.params_dict[key] = value
Exemple #10
0
 def parse_node(self):
     archive_nodes: List[Element] = self.oozie_node.findall("archive")
     if archive_nodes:
         for archive_node in archive_nodes:
             archive_path = replace_el_with_var(archive_node.text,
                                                props=self.props,
                                                quote=False)
             self.add_archive(archive_path)
     return self.archives, self.hdfs_archives
Exemple #11
0
    def parse_node(self):
        file_nodes: List[Element] = self.oozie_node.findall("file")

        for file_node in file_nodes:
            file_path = replace_el_with_var(file_node.text,
                                            props=self.props,
                                            quote=False)
            self.add_file(file_path)

        return self.files, self.hdfs_files
    def test_replace_el_with_var_func_quote(self):
        # functions shouldn't be replaced
        job_properties = {}
        props = PropertySet(job_properties=job_properties,
                            config={},
                            action_node_properties={})
        el_var = '${concat("abc", "def")}'
        expected = '\'${concat("abc", "def")}\''

        replaced = el_utils.replace_el_with_var(el_var, props=props)
        self.assertEqual(replaced, expected)
    def test_replace_el_with_var_var_quote(self):
        job_properties = {"hostname": "*****@*****.**"}
        props = PropertySet(job_properties=job_properties,
                            config={},
                            action_node_properties={})
        el_var = "${hostname}"
        expected = "'*****@*****.**'"

        replaced = el_utils.replace_el_with_var(el_var,
                                                props=props,
                                                quote=True)
        self.assertEqual(replaced, expected)
 def _parse_config(self):
     config = self.oozie_node.find("configuration")
     if config:
         property_nodes = xml_utils.find_nodes_by_tag(config, "property")
         if property_nodes:
             for node in property_nodes:
                 name = node.find("name").text
                 value = el_utils.replace_el_with_var(
                     node.find("value").text,
                     params=self.params,
                     quote=False)
                 self.properties[name] = value
Exemple #15
0
 def _parse_config(self):
     action_node_properties: Dict[str, str] = {}
     config = self.oozie_node.find("configuration")
     if config:
         props = self.props
         property_nodes = xml_utils.find_nodes_by_tag(config, "property")
         if property_nodes:
             for node in property_nodes:
                 name = node.find("name").text
                 value = el_utils.replace_el_with_var(
                     node.find("value").text, props=props, quote=False)
                 action_node_properties[name] = value
     self.props.action_node_properties = action_node_properties
    def _get_or_default(root: ET.Element, tag: str, default: str = None, params: Dict[str, str] = None):
        """
        If a node exists in the oozie_node with the tag specified in tag, it
        will attempt to replace the EL (if it exists) with the corresponding
        variable. If no EL var is found, it just returns text. However, if the
        tag is not found under oozie_node, then return default. If there are
        more than one with the specified tag, it uses the first one found.
        """
        var = xml_utils.find_nodes_by_tag(root, tag)

        if var:
            # Only check the first one
            return el_utils.replace_el_with_var(var[0].text, params=params, quote=False)
        return default
Exemple #17
0
def extract_param_values_from_action_node(oozie_node: Element,
                                          props: PropertySet):
    param_nodes = xml_utils.find_nodes_by_tag(oozie_node, TAG_PARAM)

    new_params = {}
    for node in param_nodes:
        if not node.text:
            continue
        param = el_utils.replace_el_with_var(node.text,
                                             props=props,
                                             quote=False)
        key, _, value = param.partition("=")
        new_params[key] = value
    return new_params
Exemple #18
0
def get_tag_el_text(root: ET.Element,
                    tag: str,
                    props: PropertySet,
                    default: Optional[str] = None) -> Optional[str]:
    """
    If a node exists in the oozie_node with the tag specified in tag, it
    will attempt to replace the EL (if it exists) with the corresponding
    variable. If no EL var is found, it just returns text. However, if the
    tag is not found under oozie_node, then return default. If there are
    more than one with the specified tag, it uses the first one found.
    """
    var = find_node_by_tag(root, tag)
    if var is not None and var.text is not None:
        # Only check the first one
        return el_utils.replace_el_with_var(var.text, props=props, quote=False)
    return default
Exemple #19
0
 def _parse_oozie_node(self):
     app_path = self.oozie_node.find("app-path").text
     app_path = el_utils.replace_el_with_var(app_path, props=self.props, quote=False)
     _, _, self.app_name = app_path.rpartition("/")
     # TODO: hacky: we should calculate it deriving from input_directory_path and comparing app-path
     # TODO: but for now we assume app is in "examples"
     app_path = os.path.join(EXAMPLES_PATH, self.app_name)
     logging.info(f"Converting subworkflow from {app_path}")
     converter = OozieConverter(
         input_directory_path=app_path,
         output_directory_path=self.output_directory_path,
         renderer=self.renderer,
         action_mapper=self.action_mapper,
         dag_name=self.app_name,
         initial_props=self.get_child_props(),
     )
     converter.convert(as_subworkflow=True)
Exemple #20
0
def get_tags_el_array_from_text(root: ET.Element, tag: str,
                                props: PropertySet) -> List[str]:
    """
    If nodes exist in the oozie_node with the tag specified in tag, it
    will build an array of text values for all matching nodes. While doing it
    it will attempt to resolve EL expressions in the text values.
    """
    tags_array = []
    node_array = find_nodes_by_tag(root=root, tag=tag)
    if node_array:
        for node in node_array:
            if node.text is not None:
                tags_array.append(
                    el_utils.replace_el_with_var(node.text,
                                                 props=props,
                                                 quote=False))
    return tags_array
    def on_parse_node(self):
        super().on_parse_node()
        _, self.hdfs_files = self.file_extractor.parse_node()
        _, self.hdfs_archives = self.archive_extractor.parse_node()

        self.java_jar = get_tag_el_text(self.oozie_node, props=self.props, tag=SPARK_TAG_JAR)
        self.java_class = get_tag_el_text(self.oozie_node, props=self.props, tag=SPARK_TAG_CLASS)
        if self.java_class and self.java_jar:
            self.dataproc_jars = [self.java_jar]
            self.java_jar = None
        self.job_name = get_tag_el_text(self.oozie_node, props=self.props, tag=SPARK_TAG_JOB_NAME)

        spark_opts = xml_utils.find_nodes_by_tag(self.oozie_node, SPARK_TAG_OPTS)
        if spark_opts:
            self.spark_opts.update(self._parse_spark_opts(spark_opts[0]))

        app_args = xml_utils.find_nodes_by_tag(self.oozie_node, SPARK_TAG_ARGS)
        for arg in app_args:
            self.application_args.append(el_utils.replace_el_with_var(arg.text, self.props, quote=False))
Exemple #22
0
 def _parse_oozie_node(self):
     app_path = self.oozie_node.find("app-path").text
     app_path = el_utils.replace_el_with_var(app_path,
                                             params=self.params,
                                             quote=False)
     _, _, self.app_name = app_path.rpartition("/")
     # TODO: hacky: we should calculate it deriving from input_directory_path and comparing app-path
     # TODO: but for now we assume app is in "examples"
     app_path = os.path.join(EXAMPLES_PATH, self.app_name)
     logging.info(f"Converting subworkflow from {app_path}")
     self._parse_config()
     converter = OozieConverter(
         input_directory_path=app_path,
         output_directory_path=self.output_directory_path,
         start_days_ago=0,
         template_name="subworkflow.tpl",
         action_mapper=self.action_mapper,
         control_mapper=self.control_mapper,
         dag_name=f"{self.dag_name}.{self.task_id}",
         output_dag_name=f"subdag_{self.app_name}.py",
     )
     converter.convert()