def setUp(self):
     self.converter = OozieConverter(
         dag_name="test_dag",
         input_directory_path="/input_directory_path/",
         output_directory_path="/tmp",
         action_mapper=ACTION_MAP,
         control_mapper=CONTROL_MAP,
         user="******",
     )
 def _parse_oozie_node(self):
     app_path = xml_utils.get_tag_el_text(self.oozie_node, TAG_APP)
     _, _, self.app_name = app_path.rpartition("/")
     # TODO: hacky: we should calculate it deriving from input_directory_path and comparing app-path
     # TODO: but for now we assume app is in "examples"
     app_path = os.path.join(EXAMPLES_PATH, self.app_name)
     logging.info(f"Converting subworkflow from {app_path}")
     converter = OozieConverter(
         input_directory_path=app_path,
         output_directory_path=self.output_directory_path,
         renderer=self.renderer,
         action_mapper=self.action_mapper,
         dag_name=self.app_name,
         initial_props=self.get_child_props(),
         transformers=self.transformers,
     )
     converter.convert(as_subworkflow=True)
Esempio n. 3
0
 def _parse_oozie_node(self):
     app_path = self.oozie_node.find("app-path").text
     app_path = el_utils.replace_el_with_var(app_path, props=self.props, quote=False)
     _, _, self.app_name = app_path.rpartition("/")
     # TODO: hacky: we should calculate it deriving from input_directory_path and comparing app-path
     # TODO: but for now we assume app is in "examples"
     app_path = os.path.join(EXAMPLES_PATH, self.app_name)
     logging.info(f"Converting subworkflow from {app_path}")
     converter = OozieConverter(
         input_directory_path=app_path,
         output_directory_path=self.output_directory_path,
         renderer=self.renderer,
         action_mapper=self.action_mapper,
         dag_name=self.app_name,
         initial_props=self.get_child_props(),
     )
     converter.convert(as_subworkflow=True)
Esempio n. 4
0
 def _create_converter():
     return OozieConverter(
         input_directory_path="/input_directory_path/",
         output_directory_path="/tmp",
         user="******",
         action_mapper=ACTION_MAP,
         renderer=mock.MagicMock(),
         dag_name="test_dag",
     )
Esempio n. 5
0
def main():
    args = parse_args(sys.argv[1:])
    input_directory_path = args.input_directory_path
    output_directory_path = args.output_directory_path

    start_days_ago = args.start_days_ago
    schedule_interval = args.schedule_interval
    dag_name = args.dag_name

    if not dag_name:
        dag_name = os.path.basename(input_directory_path)

    conf_path = os.path.join(input_directory_path, CONFIG)
    if not os.path.isfile(conf_path):
        logging.warning(f"""

#################################### WARNING ###########################################

The '{CONFIG}' file was not detected in {input_directory_path}.
It may be necessary to provide input parameters for the workflow.

In case of any conversion errors make sure this configuration file is really not needed.
Otherwise please provide it.

########################################################################################
        """)
    validate_workflows_script = get_o2a_validate_workflows_script()
    if validate_workflows_script:
        try:
            check_call([
                validate_workflows_script,
                f"{input_directory_path}/{HDFS_FOLDER}/{WORKFLOW_XML}"
            ])
        except CalledProcessError:
            logging.error("Workflow failed schema validation. "
                          "Please correct the workflow XML and try again.")
            exit(1)
    os.makedirs(output_directory_path, exist_ok=True)

    if args.dot:
        renderer_class = DotRenderer
    else:
        renderer_class = PythonRenderer

    renderer = renderer_class(
        output_directory_path=output_directory_path,
        schedule_interval=schedule_interval,
        start_days_ago=start_days_ago,
    )

    converter = OozieConverter(
        dag_name=dag_name,
        input_directory_path=input_directory_path,
        output_directory_path=output_directory_path,
        action_mapper=ACTION_MAP,
        user=args.user,
        renderer=renderer,
    )
    converter.recreate_output_directory()
    converter.convert()
Esempio n. 6
0
 def _parse_oozie_node(self):
     app_path = self.oozie_node.find("app-path").text
     app_path = el_utils.replace_el_with_var(app_path,
                                             params=self.params,
                                             quote=False)
     _, _, self.app_name = app_path.rpartition("/")
     # TODO: hacky: we should calculate it deriving from input_directory_path and comparing app-path
     # TODO: but for now we assume app is in "examples"
     app_path = os.path.join(EXAMPLES_PATH, self.app_name)
     logging.info(f"Converting subworkflow from {app_path}")
     self._parse_config()
     converter = OozieConverter(
         input_directory_path=app_path,
         output_directory_path=self.output_directory_path,
         start_days_ago=0,
         template_name="subworkflow.tpl",
         action_mapper=self.action_mapper,
         control_mapper=self.control_mapper,
         dag_name=f"{self.dag_name}.{self.task_id}",
         output_dag_name=f"subdag_{self.app_name}.py",
     )
     converter.convert()
Esempio n. 7
0
def main():
    args = parse_args(sys.argv[1:])
    input_directory_path = args.input_directory_path
    output_directory_path = args.output_directory_path

    start_days_ago = args.start_days_ago
    schedule_interval = args.schedule_interval
    dag_name = args.dag_name

    if not dag_name:
        dag_name = os.path.basename(input_directory_path)

    conf_path = os.path.join(input_directory_path, CONFIGURATION_PROPERTIES)
    if not os.path.isfile(conf_path):
        logging.warning(f"""

#################################### WARNING ###########################################

The '{CONFIGURATION_PROPERTIES}' file was not detected in {input_directory_path}.
It may be necessary to provide input parameters for the workflow.

In case of any conversion errors make sure this configuration file is really not needed.
Otherwise please provide it.

########################################################################################
        """)
    # If the validate-workflows script is present int the project path - use it to validate the workflow
    validate_workflows_script = os.path.join(PROJECT_PATH, "bin",
                                             "validate-workflows")
    if os.path.isfile(validate_workflows_script):
        try:
            subprocess.check_call([
                validate_workflows_script,
                f"{input_directory_path}/{HDFS_FOLDER}/{WORKFLOW_XML}"
            ])
        except CalledProcessError:
            logging.error(
                "Workflow failed schema validation. Please correct the workflow XML and try again."
            )
            exit(1)
    else:
        logging.info(
            f"Skipping workflow validation as the {validate_workflows_script} is missing"
        )
    os.makedirs(output_directory_path, exist_ok=True)

    converter = OozieConverter(
        dag_name=dag_name,
        input_directory_path=input_directory_path,
        output_directory_path=output_directory_path,
        action_mapper=ACTION_MAP,
        control_mapper=CONTROL_MAP,
        user=args.user,
        start_days_ago=start_days_ago,
        schedule_interval=schedule_interval,
    )
    converter.recreate_output_directory()
    converter.convert()
class TestOozieConverter(TestCase):
    def setUp(self):
        self.converter = OozieConverter(
            dag_name="test_dag",
            input_directory_path="/input_directory_path/",
            output_directory_path="/tmp",
            action_mapper=ACTION_MAP,
            control_mapper=CONTROL_MAP,
            user="******",
        )

    def test_parse_args_input_output_file(self):
        input_dir = "/tmp/does.not.exist/"
        output_dir = "/tmp/out/"
        args = o2a.parse_args(["-i", input_dir, "-o", output_dir])
        self.assertEqual(args.input_directory_path, input_dir)
        self.assertEqual(args.output_directory_path, output_dir)

    def test_parse_args_user(self):
        input_dir = "/tmp/does.not.exist"
        output_dir = "/tmp/out/"
        user = "******"
        args = o2a.parse_args(["-i", input_dir, "-o", output_dir, "-u", user])
        self.assertEqual(args.user, user)

    @mock.patch("o2a.converter.oozie_converter.render_template", return_value="AAA")
    @mock.patch("builtins.open", return_value=io.StringIO())
    def test_create_dag_file(self, open_mock, _):
        # Given
        workflow = Workflow(
            dag_name="A",
            input_directory_path="in_dir",
            output_directory_path="out_dir",
            relations={Relation(from_task_id="AAA", to_task_id="BBB")},
            nodes=dict(AAA=ParsedActionNode(DummyMapper(Element("dummy"), name="AAA"))),
            dependencies={"import AAAA"},
        )
        # When
        self.converter.create_dag_file(workflow)
        # Then
        open_mock.assert_called_once_with("/tmp/test_dag.py", "w")

    @mock.patch("o2a.converter.oozie_converter.parser.OozieParser.parse_workflow")
    @mock.patch("o2a.converter.oozie_converter.black")
    @mock.patch("o2a.converter.oozie_converter.fix_file")
    @mock.patch("o2a.converter.oozie_converter.SortImports")
    def test_convert(self, sort_imports_mock, autoflake_fix_file_mock, black_mock, parse_workflow_mock):
        # Given
        workflow = Workflow(
            dag_name="A",
            input_directory_path="in_dir",
            output_directory_path="out_dir",
            relations={Relation(from_task_id="AAA", to_task_id="BBB")},
            nodes=dict(AAA=ParsedActionNode(DummyMapper(Element("dummy"), name="AAA"))),
            dependencies={"import AAAA"},
        )
        parse_workflow_mock.return_value = workflow
        # When
        self.converter.convert()
        # Then
        parse_workflow_mock.assert_called_once_with()
        black_mock.format_file_in_place.assert_called_once_with(
            Path("/tmp/test_dag.py"), fast=mock.ANY, mode=mock.ANY, write_back=mock.ANY
        )
        autoflake_fix_file_mock.assert_called_once_with(
            "/tmp/test_dag.py",
            args=AutoflakeArgs(
                remove_all_unused_imports=True,
                ignore_init_module_imports=False,
                remove_duplicate_keys=False,
                remove_unused_variables=True,
                in_place=True,
                imports=None,
                expand_star_imports=False,
                check=False,
            ),
            standard_out=sys.stdout,
        )
        sort_imports_mock.assert_called_once_with("/tmp/test_dag.py")

    @mock.patch("o2a.converter.oozie_converter.render_template", return_value="TEXT_CONTENT")
    def test_write_dag_file(self, render_template_mock):
        relations = {Relation(from_task_id="TASK_1", to_task_id="TASK_2")}
        nodes = dict(TASK_1=ParsedActionNode(DummyMapper(Element("dummy"), name="TASK_1")))
        dependencies = {"import awesome_stuff"}
        workflow = Workflow(
            input_directory_path="/tmp/input_directory",
            output_directory_path="/tmp/input_directory",
            dag_name="test_dag",
            relations=relations,
            nodes=nodes,
            dependencies=dependencies,
        )

        content = self.converter.render_workflow(workflow=workflow)

        render_template_mock.assert_called_once_with(
            dag_name="test_dag",
            dependencies={"import awesome_stuff"},
            nodes=[nodes["TASK_1"]],
            params={"user.name": "USER"},
            relations={Relation(from_task_id="TASK_1", to_task_id="TASK_2")},
            schedule_interval=None,
            start_days_ago=None,
            template_name="workflow.tpl",
        )

        self.assertEqual(content, "TEXT_CONTENT")

    def test_convert_nodes(self):
        tasks_1 = [
            Task(task_id="first_task", template_name="dummy.tpl"),
            Task(task_id="second_task", template_name="dummy.tpl"),
        ]
        relations_1 = {Relation(from_task_id="first_task", to_task_id="tasks_2")}
        tasks_2 = [Task(task_id="third_task", template_name="dummy.tpl")]
        relations_2 = {}

        mapper_1 = mock.MagicMock(**{"to_tasks_and_relations.return_value": (tasks_1, relations_1)})
        mapper_2 = mock.MagicMock(**{"to_tasks_and_relations.return_value": (tasks_2, relations_2)})

        node_1 = ParsedActionNode(mapper=mapper_1)
        node_2 = ParsedActionNode(mapper=mapper_2)
        nodes = dict(TASK_1=node_1, TASK_2=node_2)

        self.converter.convert_nodes(nodes=nodes)
        self.assertIs(node_1.tasks, tasks_1)
        self.assertIs(node_2.tasks, tasks_2)
        self.assertIs(node_1.relations, relations_1)
        self.assertIs(node_2.relations, relations_2)

    def test_copy_extra_assets(self):
        mock_1 = mock.MagicMock()
        mock_2 = mock.MagicMock()

        self.converter.copy_extra_assets(dict(mock_1=mock_1, mock_2=mock_2))

        mock_1.mapper.copy_extra_assets.assert_called_once_with(
            input_directory_path="/input_directory_path/hdfs", output_directory_path="/tmp"
        )
        mock_2.mapper.copy_extra_assets.assert_called_once_with(
            input_directory_path="/input_directory_path/hdfs", output_directory_path="/tmp"
        )
Esempio n. 9
0
    def test_should_convert_demo_workflow(self):
        renderer = mock.MagicMock()

        transformers = [
            RemoveInaccessibleNodeTransformer(),
            RemoveEndTransformer(),
            RemoveKillTransformer(),
            RemoveStartTransformer(),
            RemoveJoinTransformer(),
            RemoveForkTransformer(),
        ]

        input_directory_path = path.join(EXAMPLES_PATH, "demo")
        converter = OozieConverter(
            dag_name="demo",
            input_directory_path=input_directory_path,
            output_directory_path="/tmp/",
            action_mapper=ACTION_MAP,
            renderer=renderer,
            transformers=transformers,
            user="******",
        )
        converter.recreate_output_directory()
        converter.convert()
        _, kwargs = renderer.create_workflow_file.call_args
        workflow: Workflow = kwargs["workflow"]
        self.assertEqual(input_directory_path, workflow.input_directory_path)
        self.assertEqual("/tmp/", workflow.output_directory_path)
        self.assertEqual("demo", workflow.dag_name)
        self.assertEqual(
            {
                Relation(from_task_id="decision-node", to_task_id="end", is_error=False),
                Relation(from_task_id="decision-node", to_task_id="hdfs-node", is_error=False),
                Relation(from_task_id="join-node", to_task_id="decision-node", is_error=False),
                Relation(from_task_id="pig-node", to_task_id="join-node", is_error=False),
                Relation(from_task_id="shell-node", to_task_id="join-node", is_error=False),
                Relation(from_task_id="subworkflow-node", to_task_id="join-node", is_error=False),
            },
            workflow.task_group_relations,
        )
        self.assertEqual({}, workflow.nodes)
        self.assertEqual(
            {"pig-node", "subworkflow-node", "shell-node", "join-node", "decision-node", "hdfs-node", "end"},
            workflow.task_groups.keys(),
        )
        self.assertEqual(
            {
                "from airflow import models",
                "from airflow.contrib.operators import dataproc_operator",
                "from airflow.operators import bash_operator",
                "from airflow.operators import dummy_operator",
                "from airflow.operators import python_operator",
                "from airflow.operators.subdag_operator import SubDagOperator",
                "from airflow.operators import bash_operator, dummy_operator",
                "from airflow.utils import dates",
                "from airflow.utils.trigger_rule import TriggerRule",
                "from o2a.o2a_libs.el_basic_functions import *",
                "from o2a.o2a_libs.el_basic_functions import first_not_null",
                "from o2a.o2a_libs.el_wf_functions import *",
                "from o2a.o2a_libs.property_utils import PropertySet",
                "import datetime",
                "import shlex",
                "import subdag_childwf",
            },
            workflow.dependencies,
        )