def setUp(self): self.converter = OozieConverter( dag_name="test_dag", input_directory_path="/input_directory_path/", output_directory_path="/tmp", action_mapper=ACTION_MAP, control_mapper=CONTROL_MAP, user="******", )
def _parse_oozie_node(self): app_path = xml_utils.get_tag_el_text(self.oozie_node, TAG_APP) _, _, self.app_name = app_path.rpartition("/") # TODO: hacky: we should calculate it deriving from input_directory_path and comparing app-path # TODO: but for now we assume app is in "examples" app_path = os.path.join(EXAMPLES_PATH, self.app_name) logging.info(f"Converting subworkflow from {app_path}") converter = OozieConverter( input_directory_path=app_path, output_directory_path=self.output_directory_path, renderer=self.renderer, action_mapper=self.action_mapper, dag_name=self.app_name, initial_props=self.get_child_props(), transformers=self.transformers, ) converter.convert(as_subworkflow=True)
def _parse_oozie_node(self): app_path = self.oozie_node.find("app-path").text app_path = el_utils.replace_el_with_var(app_path, props=self.props, quote=False) _, _, self.app_name = app_path.rpartition("/") # TODO: hacky: we should calculate it deriving from input_directory_path and comparing app-path # TODO: but for now we assume app is in "examples" app_path = os.path.join(EXAMPLES_PATH, self.app_name) logging.info(f"Converting subworkflow from {app_path}") converter = OozieConverter( input_directory_path=app_path, output_directory_path=self.output_directory_path, renderer=self.renderer, action_mapper=self.action_mapper, dag_name=self.app_name, initial_props=self.get_child_props(), ) converter.convert(as_subworkflow=True)
def _create_converter(): return OozieConverter( input_directory_path="/input_directory_path/", output_directory_path="/tmp", user="******", action_mapper=ACTION_MAP, renderer=mock.MagicMock(), dag_name="test_dag", )
def main(): args = parse_args(sys.argv[1:]) input_directory_path = args.input_directory_path output_directory_path = args.output_directory_path start_days_ago = args.start_days_ago schedule_interval = args.schedule_interval dag_name = args.dag_name if not dag_name: dag_name = os.path.basename(input_directory_path) conf_path = os.path.join(input_directory_path, CONFIG) if not os.path.isfile(conf_path): logging.warning(f""" #################################### WARNING ########################################### The '{CONFIG}' file was not detected in {input_directory_path}. It may be necessary to provide input parameters for the workflow. In case of any conversion errors make sure this configuration file is really not needed. Otherwise please provide it. ######################################################################################## """) validate_workflows_script = get_o2a_validate_workflows_script() if validate_workflows_script: try: check_call([ validate_workflows_script, f"{input_directory_path}/{HDFS_FOLDER}/{WORKFLOW_XML}" ]) except CalledProcessError: logging.error("Workflow failed schema validation. " "Please correct the workflow XML and try again.") exit(1) os.makedirs(output_directory_path, exist_ok=True) if args.dot: renderer_class = DotRenderer else: renderer_class = PythonRenderer renderer = renderer_class( output_directory_path=output_directory_path, schedule_interval=schedule_interval, start_days_ago=start_days_ago, ) converter = OozieConverter( dag_name=dag_name, input_directory_path=input_directory_path, output_directory_path=output_directory_path, action_mapper=ACTION_MAP, user=args.user, renderer=renderer, ) converter.recreate_output_directory() converter.convert()
def _parse_oozie_node(self): app_path = self.oozie_node.find("app-path").text app_path = el_utils.replace_el_with_var(app_path, params=self.params, quote=False) _, _, self.app_name = app_path.rpartition("/") # TODO: hacky: we should calculate it deriving from input_directory_path and comparing app-path # TODO: but for now we assume app is in "examples" app_path = os.path.join(EXAMPLES_PATH, self.app_name) logging.info(f"Converting subworkflow from {app_path}") self._parse_config() converter = OozieConverter( input_directory_path=app_path, output_directory_path=self.output_directory_path, start_days_ago=0, template_name="subworkflow.tpl", action_mapper=self.action_mapper, control_mapper=self.control_mapper, dag_name=f"{self.dag_name}.{self.task_id}", output_dag_name=f"subdag_{self.app_name}.py", ) converter.convert()
def main(): args = parse_args(sys.argv[1:]) input_directory_path = args.input_directory_path output_directory_path = args.output_directory_path start_days_ago = args.start_days_ago schedule_interval = args.schedule_interval dag_name = args.dag_name if not dag_name: dag_name = os.path.basename(input_directory_path) conf_path = os.path.join(input_directory_path, CONFIGURATION_PROPERTIES) if not os.path.isfile(conf_path): logging.warning(f""" #################################### WARNING ########################################### The '{CONFIGURATION_PROPERTIES}' file was not detected in {input_directory_path}. It may be necessary to provide input parameters for the workflow. In case of any conversion errors make sure this configuration file is really not needed. Otherwise please provide it. ######################################################################################## """) # If the validate-workflows script is present int the project path - use it to validate the workflow validate_workflows_script = os.path.join(PROJECT_PATH, "bin", "validate-workflows") if os.path.isfile(validate_workflows_script): try: subprocess.check_call([ validate_workflows_script, f"{input_directory_path}/{HDFS_FOLDER}/{WORKFLOW_XML}" ]) except CalledProcessError: logging.error( "Workflow failed schema validation. Please correct the workflow XML and try again." ) exit(1) else: logging.info( f"Skipping workflow validation as the {validate_workflows_script} is missing" ) os.makedirs(output_directory_path, exist_ok=True) converter = OozieConverter( dag_name=dag_name, input_directory_path=input_directory_path, output_directory_path=output_directory_path, action_mapper=ACTION_MAP, control_mapper=CONTROL_MAP, user=args.user, start_days_ago=start_days_ago, schedule_interval=schedule_interval, ) converter.recreate_output_directory() converter.convert()
class TestOozieConverter(TestCase): def setUp(self): self.converter = OozieConverter( dag_name="test_dag", input_directory_path="/input_directory_path/", output_directory_path="/tmp", action_mapper=ACTION_MAP, control_mapper=CONTROL_MAP, user="******", ) def test_parse_args_input_output_file(self): input_dir = "/tmp/does.not.exist/" output_dir = "/tmp/out/" args = o2a.parse_args(["-i", input_dir, "-o", output_dir]) self.assertEqual(args.input_directory_path, input_dir) self.assertEqual(args.output_directory_path, output_dir) def test_parse_args_user(self): input_dir = "/tmp/does.not.exist" output_dir = "/tmp/out/" user = "******" args = o2a.parse_args(["-i", input_dir, "-o", output_dir, "-u", user]) self.assertEqual(args.user, user) @mock.patch("o2a.converter.oozie_converter.render_template", return_value="AAA") @mock.patch("builtins.open", return_value=io.StringIO()) def test_create_dag_file(self, open_mock, _): # Given workflow = Workflow( dag_name="A", input_directory_path="in_dir", output_directory_path="out_dir", relations={Relation(from_task_id="AAA", to_task_id="BBB")}, nodes=dict(AAA=ParsedActionNode(DummyMapper(Element("dummy"), name="AAA"))), dependencies={"import AAAA"}, ) # When self.converter.create_dag_file(workflow) # Then open_mock.assert_called_once_with("/tmp/test_dag.py", "w") @mock.patch("o2a.converter.oozie_converter.parser.OozieParser.parse_workflow") @mock.patch("o2a.converter.oozie_converter.black") @mock.patch("o2a.converter.oozie_converter.fix_file") @mock.patch("o2a.converter.oozie_converter.SortImports") def test_convert(self, sort_imports_mock, autoflake_fix_file_mock, black_mock, parse_workflow_mock): # Given workflow = Workflow( dag_name="A", input_directory_path="in_dir", output_directory_path="out_dir", relations={Relation(from_task_id="AAA", to_task_id="BBB")}, nodes=dict(AAA=ParsedActionNode(DummyMapper(Element("dummy"), name="AAA"))), dependencies={"import AAAA"}, ) parse_workflow_mock.return_value = workflow # When self.converter.convert() # Then parse_workflow_mock.assert_called_once_with() black_mock.format_file_in_place.assert_called_once_with( Path("/tmp/test_dag.py"), fast=mock.ANY, mode=mock.ANY, write_back=mock.ANY ) autoflake_fix_file_mock.assert_called_once_with( "/tmp/test_dag.py", args=AutoflakeArgs( remove_all_unused_imports=True, ignore_init_module_imports=False, remove_duplicate_keys=False, remove_unused_variables=True, in_place=True, imports=None, expand_star_imports=False, check=False, ), standard_out=sys.stdout, ) sort_imports_mock.assert_called_once_with("/tmp/test_dag.py") @mock.patch("o2a.converter.oozie_converter.render_template", return_value="TEXT_CONTENT") def test_write_dag_file(self, render_template_mock): relations = {Relation(from_task_id="TASK_1", to_task_id="TASK_2")} nodes = dict(TASK_1=ParsedActionNode(DummyMapper(Element("dummy"), name="TASK_1"))) dependencies = {"import awesome_stuff"} workflow = Workflow( input_directory_path="/tmp/input_directory", output_directory_path="/tmp/input_directory", dag_name="test_dag", relations=relations, nodes=nodes, dependencies=dependencies, ) content = self.converter.render_workflow(workflow=workflow) render_template_mock.assert_called_once_with( dag_name="test_dag", dependencies={"import awesome_stuff"}, nodes=[nodes["TASK_1"]], params={"user.name": "USER"}, relations={Relation(from_task_id="TASK_1", to_task_id="TASK_2")}, schedule_interval=None, start_days_ago=None, template_name="workflow.tpl", ) self.assertEqual(content, "TEXT_CONTENT") def test_convert_nodes(self): tasks_1 = [ Task(task_id="first_task", template_name="dummy.tpl"), Task(task_id="second_task", template_name="dummy.tpl"), ] relations_1 = {Relation(from_task_id="first_task", to_task_id="tasks_2")} tasks_2 = [Task(task_id="third_task", template_name="dummy.tpl")] relations_2 = {} mapper_1 = mock.MagicMock(**{"to_tasks_and_relations.return_value": (tasks_1, relations_1)}) mapper_2 = mock.MagicMock(**{"to_tasks_and_relations.return_value": (tasks_2, relations_2)}) node_1 = ParsedActionNode(mapper=mapper_1) node_2 = ParsedActionNode(mapper=mapper_2) nodes = dict(TASK_1=node_1, TASK_2=node_2) self.converter.convert_nodes(nodes=nodes) self.assertIs(node_1.tasks, tasks_1) self.assertIs(node_2.tasks, tasks_2) self.assertIs(node_1.relations, relations_1) self.assertIs(node_2.relations, relations_2) def test_copy_extra_assets(self): mock_1 = mock.MagicMock() mock_2 = mock.MagicMock() self.converter.copy_extra_assets(dict(mock_1=mock_1, mock_2=mock_2)) mock_1.mapper.copy_extra_assets.assert_called_once_with( input_directory_path="/input_directory_path/hdfs", output_directory_path="/tmp" ) mock_2.mapper.copy_extra_assets.assert_called_once_with( input_directory_path="/input_directory_path/hdfs", output_directory_path="/tmp" )
def test_should_convert_demo_workflow(self): renderer = mock.MagicMock() transformers = [ RemoveInaccessibleNodeTransformer(), RemoveEndTransformer(), RemoveKillTransformer(), RemoveStartTransformer(), RemoveJoinTransformer(), RemoveForkTransformer(), ] input_directory_path = path.join(EXAMPLES_PATH, "demo") converter = OozieConverter( dag_name="demo", input_directory_path=input_directory_path, output_directory_path="/tmp/", action_mapper=ACTION_MAP, renderer=renderer, transformers=transformers, user="******", ) converter.recreate_output_directory() converter.convert() _, kwargs = renderer.create_workflow_file.call_args workflow: Workflow = kwargs["workflow"] self.assertEqual(input_directory_path, workflow.input_directory_path) self.assertEqual("/tmp/", workflow.output_directory_path) self.assertEqual("demo", workflow.dag_name) self.assertEqual( { Relation(from_task_id="decision-node", to_task_id="end", is_error=False), Relation(from_task_id="decision-node", to_task_id="hdfs-node", is_error=False), Relation(from_task_id="join-node", to_task_id="decision-node", is_error=False), Relation(from_task_id="pig-node", to_task_id="join-node", is_error=False), Relation(from_task_id="shell-node", to_task_id="join-node", is_error=False), Relation(from_task_id="subworkflow-node", to_task_id="join-node", is_error=False), }, workflow.task_group_relations, ) self.assertEqual({}, workflow.nodes) self.assertEqual( {"pig-node", "subworkflow-node", "shell-node", "join-node", "decision-node", "hdfs-node", "end"}, workflow.task_groups.keys(), ) self.assertEqual( { "from airflow import models", "from airflow.contrib.operators import dataproc_operator", "from airflow.operators import bash_operator", "from airflow.operators import dummy_operator", "from airflow.operators import python_operator", "from airflow.operators.subdag_operator import SubDagOperator", "from airflow.operators import bash_operator, dummy_operator", "from airflow.utils import dates", "from airflow.utils.trigger_rule import TriggerRule", "from o2a.o2a_libs.el_basic_functions import *", "from o2a.o2a_libs.el_basic_functions import first_not_null", "from o2a.o2a_libs.el_wf_functions import *", "from o2a.o2a_libs.property_utils import PropertySet", "import datetime", "import shlex", "import subdag_childwf", }, workflow.dependencies, )