def __init__(
     self,
     oozie_node: Element,
     name: str,
     dag_name: str,
     input_directory_path: str,
     output_directory_path: str,
     props: PropertySet,
     action_mapper: Dict[str, Type[ActionMapper]],
     renderer: BaseRenderer,
     transformers: List[BaseWorkflowTransformer] = None,
     **kwargs,
 ):
     ActionMapper.__init__(
         self,
         oozie_node=oozie_node,
         name=name,
         dag_name=dag_name,
         props=props,
         input_directory_path=input_directory_path,
         **kwargs,
     )
     self.task_id = name
     self.input_directory_path = input_directory_path
     self.output_directory_path = output_directory_path
     self.dag_name = dag_name
     self.action_mapper = action_mapper
     self.renderer = renderer
     self.transformers = transformers or []
     self._parse_oozie_node()
Exemple #2
0
 def __init__(
     self,
     oozie_node: Element,
     name: str,
     dag_name: str,
     input_directory_path: str,
     output_directory_path: str,
     action_mapper: Dict[str, Type[ActionMapper]],
     control_mapper: Dict[str, Type[BaseMapper]],
     trigger_rule=TriggerRule.ALL_SUCCESS,
     params=None,
     **kwargs,
 ):
     ActionMapper.__init__(self,
                           oozie_node=oozie_node,
                           name=name,
                           trigger_rule=trigger_rule,
                           **kwargs)
     if params is None:
         params = {}
     self.params = params
     self.task_id = name
     self.trigger_rule = trigger_rule
     self.properties: Dict[str, str] = {}
     self.input_directory_path = input_directory_path
     self.output_directory_path = output_directory_path
     self.dag_name = dag_name
     self.action_mapper = action_mapper
     self.control_mapper = control_mapper
     self._parse_oozie_node()
Exemple #3
0
 def __init__(
     self,
     oozie_node: ET.Element,
     name: str,
     trigger_rule: str = TriggerRule.ALL_SUCCESS,
     params: Dict[str, str] = None,
     **kwargs,
 ):
     ActionMapper.__init__(self, oozie_node, name, trigger_rule, **kwargs)
     self.params = params or {}
     self.trigger_rule = trigger_rule
     self.java_class = ""
     self.java_jar = ""
     self.job_name = None
     self.jars = []
     self.properties = {}
     self.application_args = []
     self.file_extractor = FileExtractor(oozie_node=oozie_node,
                                         params=self.params)
     self.archive_extractor = ArchiveExtractor(oozie_node=oozie_node,
                                               params=self.params)
     self.prepare_command = None
     self.hdfs_files = []
     self.hdfs_archives = []
     self.dataproc_jars = []
Exemple #4
0
 def __init__(
     self,
     oozie_node: Element,
     name: str,
     dag_name: str,
     props: PropertySet,
     jar_files: List[str],
     **kwargs,
 ):
     ActionMapper.__init__(self,
                           oozie_node=oozie_node,
                           dag_name=dag_name,
                           name=name,
                           props=props,
                           **kwargs)
     self.file_extractor = FileExtractor(oozie_node=oozie_node,
                                         props=self.props)
     self.archive_extractor = ArchiveExtractor(oozie_node=oozie_node,
                                               props=self.props)
     self.main_class: Optional[str] = None
     self.java_opts: List[str] = []
     self.args: Optional[List[str]] = None
     self.hdfs_files: Optional[List[str]] = None
     self.hdfs_archives: Optional[List[str]] = None
     self.prepare_extension: PrepareMapperExtension = PrepareMapperExtension(
         self)
     self.jar_files: List[str] = jar_files if jar_files else []
     self.jar_files_in_hdfs: List[str] = []
     self._get_jar_files_in_hdfs_full_paths()
Exemple #5
0
 def __init__(self, oozie_node: Element, name: str, props: PropertySet, **kwargs):
     ActionMapper.__init__(self, oozie_node=oozie_node, name=name, props=props, **kwargs)
     self.git_uri: Optional[str] = None
     self.git_branch: Optional[str] = None
     self.destination_path: Optional[str] = None
     self.key_path_uri: Optional[str] = None
     self.key_path: Optional[str] = None
     self.prepare_extension: PrepareMapperExtension = PrepareMapperExtension(self)
 def __init__(self, oozie_node: Element, name: str, props: PropertySet,
              **kwargs):
     ActionMapper.__init__(self,
                           oozie_node=oozie_node,
                           name=name,
                           props=props,
                           **kwargs)
     self._parse_oozie_node()
     self.prepare_extension: PrepareMapperExtension = PrepareMapperExtension(
         self)
 def __init__(self, oozie_node: Element, name: str, dag_name: str, props: PropertySet, **kwargs):
     ActionMapper.__init__(
         self, oozie_node=oozie_node, name=name, dag_name=dag_name, props=props, **kwargs
     )
     self.params_dict: Dict[str, str] = {}
     self.file_extractor = FileExtractor(oozie_node=oozie_node, props=self.props)
     self.archive_extractor = ArchiveExtractor(oozie_node=oozie_node, props=self.props)
     self.name_node: Optional[str] = None
     self.hdfs_files: Optional[List[str]] = None
     self.hdfs_archives: Optional[List[str]] = None
     self.prepare_extension: PrepareMapperExtension = PrepareMapperExtension(self)
Exemple #8
0
 def __init__(
     self,
     oozie_node: ET.Element,
     name: str,
     trigger_rule: str = TriggerRule.ALL_SUCCESS,
     params: Dict[str, str] = None,
     **kwargs,
 ):
     ActionMapper.__init__(self, oozie_node, name, trigger_rule, **kwargs)
     if params is None:
         params = {}
     self.params = params
     self.trigger_rule = trigger_rule
Exemple #9
0
 def __init__(self, oozie_node: Element, name: str, dag_name: str,
              props: PropertySet, **kwargs):
     ActionMapper.__init__(self,
                           oozie_node=oozie_node,
                           dag_name=dag_name,
                           name=name,
                           props=props,
                           **kwargs)
     # *_addr suffix to satisfy Pylint's 3-letter variable length minimum; bcc_addr for consistency
     self.to_addr: Optional[str] = None
     self.cc_addr: Optional[str] = None
     self.bcc_addr: Optional[str] = None
     self.subject: Optional[str] = None
     self.body: Optional[str] = None
    def __init__(
        self, oozie_node: Element, name: str, props: PropertySet, template: str = "ssh.tpl", **kwargs
    ):
        ActionMapper.__init__(self, oozie_node=oozie_node, name=name, props=props, **kwargs)
        self.template = template

        self.command = self.get_command()
        host_key = self.get_host_key()

        # Since Airflow separates user and host, we can't use jinja templating.
        # We must check if it is in job_properties.
        user_host = host_key.split("@")
        self.user = user_host[0]
        self.host = user_host[1]
 def __init__(self, oozie_node: ET.Element, name: str, props: PropertySet, **kwargs):
     ActionMapper.__init__(self, oozie_node=oozie_node, name=name, props=props, **kwargs)
     self.java_class: Optional[str] = None
     self.java_jar: Optional[str] = None
     self.job_name: Optional[str] = None
     self.jars: List[str] = []
     self.application_args: List[str] = []
     self.file_extractor = FileExtractor(oozie_node=oozie_node, props=self.props)
     self.archive_extractor = ArchiveExtractor(oozie_node=oozie_node, props=self.props)
     self.hdfs_files: List[str] = []
     self.hdfs_archives: List[str] = []
     self.dataproc_jars: List[str] = []
     self.spark_opts: Dict[str, str] = {}
     self.prepare_extension: PrepareMapperExtension = PrepareMapperExtension(self)
 def __init__(self, oozie_node: Element, name: str, props: PropertySet,
              **kwargs):
     ActionMapper.__init__(self,
                           oozie_node=oozie_node,
                           name=name,
                           props=props,
                           **kwargs)
     self.params_dict: Dict[str, str] = {}
     self.file_extractor = FileExtractor(oozie_node=oozie_node,
                                         props=self.props)
     self.archive_extractor = ArchiveExtractor(oozie_node=oozie_node,
                                               props=self.props)
     self._parse_oozie_node()
     self.prepare_extension: PrepareMapperExtension = PrepareMapperExtension(
         self)
Exemple #13
0
 def __init__(
     self,
     oozie_node: Element,
     name: str,
     trigger_rule: str = TriggerRule.ALL_SUCCESS,
     params=None,
     **kwargs,
 ):
     ActionMapper.__init__(self, oozie_node=oozie_node, name=name, trigger_rule=trigger_rule, **kwargs)
     if params is None:
         params = dict()
     self.params = params
     self.trigger_rule = trigger_rule
     self.properties = {}
     self.params_dict = {}
     self.file_extractor = FileExtractor(oozie_node=oozie_node, params=params)
     self.archive_extractor = ArchiveExtractor(oozie_node=oozie_node, params=params)
     self._parse_oozie_node()
 def __init__(self, oozie_node: Element, name: str, props: PropertySet,
              **kwargs):
     ActionMapper.__init__(self,
                           oozie_node=oozie_node,
                           name=name,
                           props=props,
                           **kwargs)
     self.variables: Optional[Dict[str, str]] = None
     self.query: Optional[str] = None
     self.script: Optional[str] = None
     self.hdfs_files: Optional[List[str]] = None
     self.hdfs_archives: Optional[List[str]] = None
     self.file_extractor = FileExtractor(oozie_node=oozie_node,
                                         props=self.props)
     self.archive_extractor = ArchiveExtractor(oozie_node=oozie_node,
                                               props=self.props)
     self.prepare_extension: PrepareMapperExtension = PrepareMapperExtension(
         self)
Exemple #15
0
    def __init__(
        self,
        oozie_node: Element,
        name: str,
        trigger_rule: str = TriggerRule.ALL_SUCCESS,
        params: Dict[str, str] = None,
        template: str = "ssh.tpl",
        **kwargs,
    ):
        ActionMapper.__init__(self,
                              oozie_node=oozie_node,
                              name=name,
                              trigger_rule=trigger_rule,
                              **kwargs)

        if params is None:
            params = {}
        self.template = template

        cmd_node = self.oozie_node.find("command")
        arg_nodes = self.oozie_node.findall("args")
        if cmd_node is None or not cmd_node.text:
            raise Exception(
                "Missing or empty command node in SSH action {}".format(
                    self.oozie_node))
        cmd = cmd_node.text
        args = (x.text if x.text else "" for x in arg_nodes)
        cmd = " ".join(shlex.quote(x) for x in [cmd, *args])

        self.command = el_utils.convert_el_to_jinja(cmd, quote=True)
        host = self.oozie_node.find("host")
        if host is None:
            raise Exception("Missing host node in SSH action: {}".format(
                self.oozie_node))
        host_key = el_utils.strip_el(host.text)
        # the <user> node is formatted like [USER]@[HOST]
        if host_key in params:
            host_key = params[host_key]

        # Since ariflow separates user and host, we can't use jinja templating.
        # We must check if it is in params.
        user_host = host_key.split("@")
        self.user = user_host[0]
        self.host = user_host[1]
Exemple #16
0
    def test_prepend_task_empty_relations(self):
        task_1 = Task(task_id=TEST_MAPPER_NAME + "_1", template_name="pig.tpl")
        task_2 = Task(task_id=TEST_MAPPER_NAME + "_2", template_name="pig.tpl")

        tasks, relations = ActionMapper.prepend_task(task_to_prepend=task_1,
                                                     tasks=[task_2],
                                                     relations=[])
        self.assertEqual([task_1, task_2], tasks)
        self.assertEqual([
            Relation(from_task_id="mapper_name_1", to_task_id="mapper_name_2")
        ], relations)
Exemple #17
0
 def test_prepend_task_no_tasks(self):
     task_1 = Task(task_id=TEST_MAPPER_NAME + "_1", template_name="pig.tpl")
     with self.assertRaises(IndexError):
         ActionMapper.prepend_task(task_to_prepend=task_1,
                                   tasks=[],
                                   relations=[])