Example #1
0
 def _extract_java_data(self):
     """Extracts Java node data."""
     root = self.oozie_node
     props = self.props
     if "mapred.child.java.opts" in props.merged:
         self.java_opts.extend(
             props.merged["mapred.child.java.opts"].split(" "))
     if "mapreduce.map.java.opts" in props.merged:
         self.java_opts.extend(
             props.merged["mapreduce.map.java.opts"].split(" "))
     self.main_class = xml_utils.get_tag_el_text(root=root,
                                                 tag=TAG_MAIN_CLASS,
                                                 props=props)
     java_opts_string = xml_utils.get_tag_el_text(root=root,
                                                  tag=TAG_JAVA_OPTS,
                                                  props=props)
     if java_opts_string:
         self.java_opts.extend(java_opts_string.split(" "))
     else:
         self.java_opts.extend(
             get_tags_el_array_from_text(root=root,
                                         tag=TAG_JAVA_OPT,
                                         props=props))
     self.args = get_tags_el_array_from_text(root=root,
                                             tag=TAG_ARG,
                                             props=props)
 def __extract_email_data(self):
     root = self.oozie_node
     self.to_addr = xml_utils.get_tag_el_text(root=root, tag="to")
     self.cc_addr = xml_utils.get_tag_el_text(root=root, tag="cc")
     self.bcc_addr = xml_utils.get_tag_el_text(root=root, tag="bcc")
     self.subject = xml_utils.get_tag_el_text(root=root, tag="subject")
     self.body = xml_utils.get_tag_el_text(root=root, tag="body")
    def _parse_oozie_node(self):
        self.resource_manager = get_tag_el_text(self.oozie_node, TAG_RESOURCE)
        self.name_node = get_tag_el_text(self.oozie_node, TAG_NAME)
        self.script_file_name = get_tag_el_text(self.oozie_node, TAG_SCRIPT)

        self.params_dict = extract_param_values_from_action_node(
            self.oozie_node)
        self.files, self.hdfs_files = self.file_extractor.parse_node()
        self.archives, self.hdfs_archives = self.archive_extractor.parse_node()
Example #4
0
 def on_parse_node(self):
     super().on_parse_node()
     self.git_uri = get_tag_el_text(self.oozie_node, TAG_GIT_URI, props=self.props)
     self.git_branch = get_tag_el_text(self.oozie_node, TAG_BRANCH, props=self.props)
     destination_uri = get_tag_el_text(self.oozie_node, tag=TAG_DESTINATION_URI, props=self.props)
     if destination_uri:
         self.destination_path = urlparse(destination_uri).path
     key_path_uri = get_tag_el_text(self.oozie_node, tag=TAG_KEY_PATH, props=self.props)
     self.key_path = urlparse(key_path_uri).path if key_path_uri else None
Example #5
0
    def _parse_oozie_node(self):
        self.resource_manager = get_tag_el_text(self.oozie_node, TAG_RESOURCE)
        self.name_node = get_tag_el_text(self.oozie_node, TAG_NAME)

        cmd_txt = get_tag_el_text(self.oozie_node, TAG_CMD)
        args = get_tags_el_array_from_text(self.oozie_node, TAG_ARG)
        cmd = " ".join([cmd_txt] + [x for x in args])

        self.bash_command = el_parser.translate(cmd, quote=False)
        self.pig_command = f"sh {self.bash_command}"
    def _parse_oozie_node(self):
        self.resource_manager = get_tag_el_text(self.oozie_node, TAG_RESOURCE,
                                                self.props)
        self.name_node = get_tag_el_text(self.oozie_node, TAG_NAME, self.props)

        cmd_txt = get_tag_el_text(self.oozie_node, TAG_CMD, self.props)
        args = get_tags_el_array_from_text(self.oozie_node, TAG_ARG,
                                           self.props)
        cmd = " ".join([cmd_txt] + [x for x in args])

        self.bash_command = convert_el_to_jinja(cmd, quote=False)
        self.pig_command = f"sh {self.bash_command}"
Example #7
0
 def on_parse_node(self):
     git_uri = get_tag_el_text(self.oozie_node, TAG_GIT_URI, self.params)
     git_branch = get_tag_el_text(self.oozie_node, TAG_BRANCH, self.params)
     destination_uri = get_tag_el_text(self.oozie_node, TAG_DESTINATION_URI,
                                       self.params)
     destination_path = urlparse(destination_uri).path
     key_path_uri = get_tag_el_text(self.oozie_node, TAG_KEY_PATH,
                                    self.params)
     key_path = urlparse(key_path_uri).path
     self.bash_command = prepare_git_command(
         git_uri=git_uri,
         git_branch=git_branch,
         destination_path=destination_path,
         key_path=key_path)
 def on_parse_node(self):
     super().on_parse_node()
     self.git_uri = get_tag_el_text(self.oozie_node, TAG_GIT_URI)
     self.git_branch = get_tag_el_text(self.oozie_node, TAG_BRANCH)
     destination_uri = get_tag_el_text(self.oozie_node,
                                       tag=TAG_DESTINATION_URI)
     if destination_uri:
         self.destination_path = normalize_path(destination_uri,
                                                props=self.props,
                                                translated=True)
     key_path_uri = get_tag_el_text(self.oozie_node, tag=TAG_KEY_PATH)
     self.key_path = (normalize_path(
         key_path_uri, props=self.props, translated=True)
                      if key_path_uri else None)
Example #9
0
 def on_parse_node(self):
     super().on_parse_node()
     self.name_node = get_tag_el_text(self.oozie_node, "name-node")
     self.params_dict = extract_param_values_from_action_node(
         self.oozie_node)
     _, self.hdfs_files = self.file_extractor.parse_node()
     _, self.hdfs_archives = self.archive_extractor.parse_node()
    def on_parse_node(self):
        super().on_parse_node()
        _, self.hdfs_files = self.file_extractor.parse_node()
        _, self.hdfs_archives = self.archive_extractor.parse_node()

        self.java_jar = get_tag_el_text(self.oozie_node, tag=SPARK_TAG_JAR)
        self.java_class = get_tag_el_text(self.oozie_node, tag=SPARK_TAG_CLASS)
        if self.java_class and self.java_jar:
            self.dataproc_jars = [self.java_jar]
            self.java_jar = None
        self.job_name = get_tag_el_text(self.oozie_node, tag=SPARK_TAG_JOB_NAME)

        spark_opts = xml_utils.find_nodes_by_tag(self.oozie_node, SPARK_TAG_OPTS)
        if spark_opts:
            self.spark_opts.update(self._parse_spark_opts(spark_opts[0]))

        self.application_args = xml_utils.get_tags_el_array_from_text(self.oozie_node, tag=SPARK_TAG_ARG)
    def get_command(self) -> str:
        cmd_txt = xml_utils.get_tag_el_text(self.oozie_node, TAG_CMD, self.props)
        args = xml_utils.get_tags_el_array_from_text(self.oozie_node, TAG_ARG, self.props)
        if not cmd_txt:
            raise Exception("Missing or empty command node in SSH action {}".format(self.oozie_node))

        cmd = " ".join([cmd_txt] + [shlex.quote(x) for x in args])
        cmd = el_utils.convert_el_to_jinja(cmd)
        return cmd
    def on_parse_node(self):
        super().on_parse_node()
        self._parse_config()
        self.query = get_tag_el_text(self.oozie_node, TAG_QUERY)
        self.script = get_tag_el_text(self.oozie_node, TAG_SCRIPT)
        if not self.query and not self.script:
            raise ParseException(
                f"Action Configuration does not include {TAG_SCRIPT} or {TAG_QUERY} element"
            )

        if self.query and self.script:
            raise ParseException(
                f"Action Configuration include {TAG_SCRIPT} and {TAG_QUERY} element. "
                f"Only one can be set at the same time.")

        self.variables = extract_param_values_from_action_node(self.oozie_node)
        _, self.hdfs_files = self.file_extractor.parse_node()
        _, self.hdfs_archives = self.archive_extractor.parse_node()
    def get_command(self) -> str:
        cmd_txt = xml_utils.get_tag_el_text(self.oozie_node, TAG_CMD)
        args = xml_utils.get_tags_el_array_from_text(self.oozie_node, TAG_ARG)
        if not cmd_txt:
            raise Exception(f"Missing or empty command node in SSH action {self.oozie_node}")

        cmd = " ".join([cmd_txt] + [shlex.quote(x) for x in args])
        cmd = el_parser.translate(cmd, quote=True)
        return cmd
Example #14
0
    def on_parse_node(self):
        super().on_parse_node()
        _, self.hdfs_files = self.file_extractor.parse_node()
        _, self.hdfs_archives = self.archive_extractor.parse_node()

        self.java_jar = get_tag_el_text(self.oozie_node, props=self.props, tag=SPARK_TAG_JAR)
        self.java_class = get_tag_el_text(self.oozie_node, props=self.props, tag=SPARK_TAG_CLASS)
        if self.java_class and self.java_jar:
            self.dataproc_jars = [self.java_jar]
            self.java_jar = None
        self.job_name = get_tag_el_text(self.oozie_node, props=self.props, tag=SPARK_TAG_JOB_NAME)

        spark_opts = xml_utils.find_nodes_by_tag(self.oozie_node, SPARK_TAG_OPTS)
        if spark_opts:
            self.spark_opts.update(self._parse_spark_opts(spark_opts[0]))

        app_args = xml_utils.find_nodes_by_tag(self.oozie_node, SPARK_TAG_ARGS)
        for arg in app_args:
            self.application_args.append(el_utils.replace_el_with_var(arg.text, self.props, quote=False))
 def _parse_oozie_node(self):
     app_path = xml_utils.get_tag_el_text(self.oozie_node, TAG_APP)
     _, _, self.app_name = app_path.rpartition("/")
     # TODO: hacky: we should calculate it deriving from input_directory_path and comparing app-path
     # TODO: but for now we assume app is in "examples"
     app_path = os.path.join(EXAMPLES_PATH, self.app_name)
     logging.info(f"Converting subworkflow from {app_path}")
     converter = OozieConverter(
         input_directory_path=app_path,
         output_directory_path=self.output_directory_path,
         renderer=self.renderer,
         action_mapper=self.action_mapper,
         dag_name=self.app_name,
         initial_props=self.get_child_props(),
         transformers=self.transformers,
     )
     converter.convert(as_subworkflow=True)