def test_build_workflow_for_job_pig(self, job_binary): job, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG, configs={}) job_binary.return_value = {"name": "script.pig"} input_data = u.create_data_source('swift://ex/i') output_data = u.create_data_source('swift://ex/o') data_source_urls = {input_data.id: input_data.url, output_data.id: output_data.url} res = workflow_factory.get_workflow_xml( job, u.create_cluster(), job_exec.job_configs, input_data, output_data, 'hadoop', data_source_urls) self.assertIn(""" <param>INPUT=swift://ex.sahara/i</param> <param>OUTPUT=swift://ex.sahara/o</param>""", res) self.assertIn(""" <configuration> <property> <name>fs.swift.service.sahara.password</name> <value>admin1</value> </property> <property> <name>fs.swift.service.sahara.username</name> <value>admin</value> </property> </configuration>""", res) self.assertIn("<script>script.pig</script>", res) # testing workflow creation with a proxy domain self.override_config('use_domain_for_proxy_users', True) self.override_config("proxy_user_domain_name", 'sahara_proxy_domain') job, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG, proxy=True) res = workflow_factory.get_workflow_xml( job, u.create_cluster(), job_exec.job_configs, input_data, output_data, 'hadoop', data_source_urls) self.assertIn(""" <configuration> <property> <name>fs.swift.service.sahara.domain.name</name> <value>sahara_proxy_domain</value> </property> <property> <name>fs.swift.service.sahara.password</name> <value>55555555-6666-7777-8888-999999999999</value> </property> <property> <name>fs.swift.service.sahara.trust.id</name> <value>0123456789abcdef0123456789abcdef</value> </property> <property> <name>fs.swift.service.sahara.username</name> <value>job_00000000-1111-2222-3333-4444444444444444</value> </property> </configuration>""", res)
def test_build_workflow_swift_configs(self, job_binary): # Test that swift configs come from either input or output data sources job, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG, configs={}) job_binary.return_value = {"name": "script.pig"} input_data = u.create_data_source('swift://ex/i') output_data = u.create_data_source('hdfs://user/hadoop/out') res = workflow_factory.get_workflow_xml( job, u.create_cluster(), job_exec, input_data, output_data, 'hadoop') self.assertIn(""" <configuration> <property> <name>fs.swift.service.sahara.password</name> <value>admin1</value> </property> <property> <name>fs.swift.service.sahara.username</name> <value>admin</value> </property> </configuration>""", res) input_data = u.create_data_source('hdfs://user/hadoop/in') output_data = u.create_data_source('swift://ex/o') res = workflow_factory.get_workflow_xml( job, u.create_cluster(), job_exec, input_data, output_data, 'hadoop') self.assertIn(""" <configuration> <property> <name>fs.swift.service.sahara.password</name> <value>admin1</value> </property> <property> <name>fs.swift.service.sahara.username</name> <value>admin</value> </property> </configuration>""", res) job, job_exec = u.create_job_exec( edp.JOB_TYPE_PIG, configs={'configs': {'dummy': 'value'}}) input_data = u.create_data_source('hdfs://user/hadoop/in') output_data = u.create_data_source('hdfs://user/hadoop/out') res = workflow_factory.get_workflow_xml( job, u.create_cluster(), job_exec, input_data, output_data, 'hadoop') self.assertIn(""" <configuration> <property> <name>dummy</name> <value>value</value> </property> </configuration>""", res)
def test_build_workflow_for_job_hive(self, job_binary): job, job_exec = u.create_job_exec(edp.JOB_TYPE_HIVE, configs={}) job_binary.return_value = {"name": "script.q"} input_data = u.create_data_source('swift://ex/i') output_data = u.create_data_source('swift://ex/o') data_source_urls = {input_data.id: input_data.url, output_data.id: output_data.url} res = workflow_factory.get_workflow_xml( job, u.create_cluster(), job_exec.job_configs, input_data, output_data, 'hadoop', data_source_urls) doc = xml.parseString(res) hive = doc.getElementsByTagName('hive')[0] self.assertEqual('/user/hadoop/conf/hive-site.xml', xmlutils.get_text_from_node(hive, 'job-xml')) configuration = hive.getElementsByTagName('configuration') properties = xmlutils.get_property_dict(configuration[0]) self.assertEqual({'fs.swift.service.sahara.password': '******', 'fs.swift.service.sahara.username': '******'}, properties) self.assertEqual('script.q', xmlutils.get_text_from_node(hive, 'script')) params = xmlutils.get_param_dict(hive) self.assertEqual({'INPUT': 'swift://ex.sahara/i', 'OUTPUT': 'swift://ex.sahara/o'}, params) # testing workflow creation with a proxy domain self.override_config('use_domain_for_proxy_users', True) self.override_config("proxy_user_domain_name", 'sahara_proxy_domain') job, job_exec = u.create_job_exec(edp.JOB_TYPE_HIVE, proxy=True) res = workflow_factory.get_workflow_xml( job, u.create_cluster(), job_exec.job_configs, input_data, output_data, 'hadoop', data_source_urls) doc = xml.parseString(res) hive = doc.getElementsByTagName('hive')[0] configuration = hive.getElementsByTagName('configuration') properties = xmlutils.get_property_dict(configuration[0]) self.assertEqual({ 'fs.swift.service.sahara.domain.name': 'sahara_proxy_domain', 'fs.swift.service.sahara.trust.id': '0123456789abcdef0123456789abcdef', 'fs.swift.service.sahara.password': '******', 'fs.swift.service.sahara.username': '******'}, properties)
def test_build_workflow_for_job_hive(self, job_binary): job, job_exec = u.create_job_exec(edp.JOB_TYPE_HIVE, configs={}) job_binary.return_value = {"name": "script.q"} input_data = u.create_data_source("swift://ex/i") output_data = u.create_data_source("swift://ex/o") data_source_urls = {input_data.id: input_data.url, output_data.id: output_data.url} res = workflow_factory.get_workflow_xml( job, u.create_cluster(), job_exec.job_configs, input_data, output_data, "hadoop", data_source_urls ) doc = xml.parseString(res) hive = doc.getElementsByTagName("hive")[0] self.assertEqual("/user/hadoop/conf/hive-site.xml", xmlutils.get_text_from_node(hive, "job-xml")) configuration = hive.getElementsByTagName("configuration") properties = xmlutils.get_property_dict(configuration[0]) self.assertEqual( {"fs.swift.service.sahara.password": "******", "fs.swift.service.sahara.username": "******"}, properties ) self.assertEqual("script.q", xmlutils.get_text_from_node(hive, "script")) params = xmlutils.get_param_dict(hive) self.assertEqual({"INPUT": "swift://ex.sahara/i", "OUTPUT": "swift://ex.sahara/o"}, params) # testing workflow creation with a proxy domain self.override_config("use_domain_for_proxy_users", True) self.override_config("proxy_user_domain_name", "sahara_proxy_domain") job, job_exec = u.create_job_exec(edp.JOB_TYPE_HIVE, proxy=True) res = workflow_factory.get_workflow_xml( job, u.create_cluster(), job_exec.job_configs, input_data, output_data, "hadoop", data_source_urls ) doc = xml.parseString(res) hive = doc.getElementsByTagName("hive")[0] configuration = hive.getElementsByTagName("configuration") properties = xmlutils.get_property_dict(configuration[0]) self.assertEqual( { "fs.swift.service.sahara.domain.name": "sahara_proxy_domain", "fs.swift.service.sahara.trust.id": "0123456789abcdef0123456789abcdef", "fs.swift.service.sahara.password": "******", "fs.swift.service.sahara.username": "******", }, properties, )
def test_hdfs_upload_job_files(self, conductor_raw_data, remote_class, remote): remote_class.__exit__.return_value = 'closed' remote.return_value = remote_class conductor_raw_data.return_value = 'ok' oje = FakeOozieJobEngine(u.create_cluster()) job, _ = u.create_job_exec(edp.JOB_TYPE_PIG) res = oje._upload_job_files_to_hdfs(mock.Mock(), 'job_prefix', job, {}) self.assertEqual(['job_prefix/script.pig'], res) job, _ = u.create_job_exec(edp.JOB_TYPE_MAPREDUCE) res = oje._upload_job_files_to_hdfs(mock.Mock(), 'job_prefix', job, {}) self.assertEqual(['job_prefix/lib/main.jar'], res)
def test_prepare_run_job(self, job, data_source, update, remote, wf_factory, get_ds_urls, prepare_cluster): wf_factory.return_value = mock.MagicMock() remote_class = mock.MagicMock() remote_class.__exit__.return_value = 'closed' remote.return_value = remote_class job_class = mock.MagicMock() job_class.name = "myJob" job.return_value = job_class source = mock.MagicMock() source.url = "localhost" get_ds_urls.return_value = ('url', 'url') data_source.return_value = source oje = FakeOozieJobEngine(u.create_cluster()) _, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG) update.return_value = job_exec res = oje._prepare_run_job(job_exec) self.assertEqual(ctx.ctx(), res['context']) self.assertEqual('hadoop', res['hdfs_user']) self.assertEqual(job_exec, res['job_execution']) self.assertEqual({}, res['oozie_params'])
def test_job_type_supported(self, job_get): job, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG) job_get.return_value = job self.assertIsNotNone(job_manager._get_job_engine(u.create_cluster(), job_exec)) job.type = "unsupported_type" self.assertIsNone(job_manager._get_job_engine(u.create_cluster(), job_exec))
def test_build_workflow_for_job_java(self): # If args include swift paths, user and password values # will have to be supplied via configs instead of being # lifted from input or output data sources configs = {sw.HADOOP_SWIFT_USERNAME: '******', sw.HADOOP_SWIFT_PASSWORD: '******'} configs = { 'configs': configs, 'args': ['swift://ex/i', 'output_path'] } job, job_exec = u.create_job_exec(edp.JOB_TYPE_JAVA, configs) res = workflow_factory.get_workflow_xml( job, u.create_cluster(), job_exec) self.assertIn(""" <configuration> <property> <name>fs.swift.service.sahara.password</name> <value>admin1</value> </property> <property> <name>fs.swift.service.sahara.username</name> <value>admin</value> </property> </configuration> <main-class>%s</main-class> <java-opts>%s</java-opts> <arg>swift://ex.sahara/i</arg> <arg>output_path</arg>""" % (_java_main_class, _java_opts), res)
def test_build_workflow_for_job_hive(self, job_binary): job, job_exec = u.create_job_exec(edp.JOB_TYPE_HIVE) job_binary.return_value = {"name": "script.q"} input_data = u.create_data_source('swift://ex/i') output_data = u.create_data_source('swift://ex/o') res = workflow_factory.get_workflow_xml( job, u.create_cluster(), job_exec, input_data, output_data, 'hadoop') self.assertIn(""" <job-xml>/user/hadoop/conf/hive-site.xml</job-xml> <configuration> <property> <name>fs.swift.service.sahara.password</name> <value>admin1</value> </property> <property> <name>fs.swift.service.sahara.username</name> <value>admin</value> </property> </configuration> <script>script.q</script> <param>INPUT=swift://ex.sahara/i</param> <param>OUTPUT=swift://ex.sahara/o</param>""", res)
def test_run_job(self, exec_get, job, data_source, update, remote, wf_factory, get_ds_urls, prepare_cluster): wf_factory.return_value = mock.MagicMock() remote_class = mock.MagicMock() remote_class.__exit__.return_value = 'closed' remote.return_value = remote_class job_class = mock.MagicMock() job.return_value = job_class job.name = "myJob" source = mock.MagicMock() source.url = "localhost" data_source.return_value = source get_ds_urls.return_value = ('url', 'url') oje = FakeOozieJobEngine(u.create_cluster()) client_class = mock.MagicMock() client_class.add_job = mock.MagicMock(return_value=1) client_class.get_job_info = mock.MagicMock( return_value={'status': 'PENDING'}) oje.get_client = mock.MagicMock(return_value=client_class) _, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG) update.return_value = job_exec self.assertEqual((1, 'PENDING', None), oje.run_job(job_exec))
def _build_workflow_with_conf_common(self, job_type): input_data = u.create_data_source('swift://ex/i') output_data = u.create_data_source('swift://ex/o') job, job_exec = u.create_job_exec(job_type, configs={"configs": {'c': 'f'}}) res = workflow_factory.get_workflow_xml( job, u.create_cluster(), job_exec, input_data, output_data, 'hadoop') self.assertIn(""" <property> <name>c</name> <value>f</value> </property>""", res) self.assertIn(""" <property> <name>mapred.input.dir</name> <value>swift://ex.sahara/i</value> </property>""", res) self.assertIn(""" <property> <name>mapred.output.dir</name> <value>swift://ex.sahara/o</value> </property>""", res)
def test_run_job_handles_exceptions(self, runjob, job_ex_upd): runjob.side_effect = ex.SwiftClientException("Unauthorised") job, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG) job_manager.run_job(job_exec.id) self.assertEqual(1, job_ex_upd.call_count) new_status = job_ex_upd.call_args[0][2]["info"]["status"] self.assertEqual(edp.JOB_STATUS_FAILED, new_status)
def test_hdfs_create_workflow_dir(self, remote): remote_class = mock.MagicMock() remote_class.__exit__.return_value = 'closed' remote.return_value = remote_class oje = FakeOozieJobEngine(u.create_cluster()) job, _ = u.create_job_exec(edp.JOB_TYPE_PIG) res = oje._create_hdfs_workflow_dir(mock.Mock(), job) self.assertIn('/user/hadoop/special_name/', res)
def test_build_workflow_for_job_java_with_adapter(self, edp_conf_mock): edp_conf_mock.return_value = True configs = {"configs": {"edp.java.main_class": "some_main"}} job, job_exec = u.create_job_exec(edp.JOB_TYPE_JAVA, configs) res = workflow_factory.get_workflow_xml(job, u.create_cluster(), job_exec.job_configs) self.assertIn("<main-class>org.openstack.sahara.edp.MainWrapper</main-class>", res) self.assertNotIn("some_main", res)
def test_run_job_unsupported_type(self, cluster_get, job_exec_get, job_get): job, job_exec = u.create_job_exec("unsupported_type") job_exec_get.return_value = job_exec job_get.return_value = job cluster = u.create_cluster() cluster.status = c_u.CLUSTER_STATUS_ACTIVE cluster_get.return_value = cluster with testtools.ExpectedException(ex.EDPError): job_manager._run_job(job_exec.id)
def test_run_job_unsupported_type(self, cluster_get, job_exec_get, job_get): job, job_exec = u.create_job_exec("unsupported_type") job_exec_get.return_value = job_exec job_get.return_value = job cluster = u.create_cluster() cluster.status = c_u.CLUSTER_STATUS_ACTIVE cluster_get.return_value = cluster with testtools.ExpectedException(ex.EDPError): job_manager._run_job(job_exec.id)
def test_build_workflow_for_job_shell(self): configs = {"configs": {"k1": "v1"}, "params": {"p1": "v1"}, "args": ["a1", "a2"]} job, job_exec = u.create_job_exec(edp.JOB_TYPE_SHELL, configs) res = workflow_factory.get_workflow_xml(job, u.create_cluster(), job_exec.job_configs) self.assertIn("<name>k1</name>", res) self.assertIn("<value>v1</value>", res) self.assertIn("<env-var>p1=v1</env-var>", res) self.assertIn("<argument>a1</argument>", res) self.assertIn("<argument>a2</argument>", res)
def test_suspend_unsuspendible_job(self, suspend_job_get, cluster_get, job_exec_get, job_get): info = {'status': edp.JOB_STATUS_SUCCEEDED} job, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG, None, False, info) job_exec_get.return_value = job_exec job_get.return_value = job cluster = u.create_cluster() cluster.status = "Active" cluster_get.return_value = cluster self.assertEqual(0, suspend_job_get.call_count)
def test_build_workflow_for_job_java_with_adapter(self, edp_conf_mock): edp_conf_mock.return_value = True configs = {"configs": {"edp.java.main_class": "some_main"}} job, job_exec = u.create_job_exec(edp.JOB_TYPE_JAVA, configs) res = workflow_factory.get_workflow_xml( job, u.create_cluster(), job_exec.job_configs) self.assertIn( "<main-class>org.openstack.sahara.edp.MainWrapper</main-class>", res) self.assertNotIn("some_main", res)
def test_get_job_status(self): oje = FakeOozieJobEngine(u.create_cluster()) client_class = mock.MagicMock() client_class.add_job = mock.MagicMock(return_value=1) client_class.get_job_info = mock.MagicMock( return_value={'status': 'PENDING'}) oje.get_client = mock.MagicMock(return_value=client_class) _, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG) self.assertIsNone(oje.get_job_status(job_exec)) job_exec.engine_job_id = 1 self.assertEqual({'status': 'PENDING'}, oje.get_job_status(job_exec))
def test_scheduled_edp_job_run(self, job_exec_get, cluster_get, job_get, run_scheduled_job): configs = {"job_execution_info": {"job_execution_type": "scheduled", "start": "2015-5-15T01:00Z"}} job, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG, configs) job_exec_get.return_value = job_exec job_get.return_value = job cluster = u.create_cluster() cluster.status = "Active" cluster_get.return_value = cluster job_manager._run_job(job_exec.id) self.assertEqual(1, run_scheduled_job.call_count)
def test_get_job_status(self): oje = FakeOozieJobEngine(u.create_cluster()) client_class = mock.MagicMock() client_class.add_job = mock.MagicMock(return_value=1) client_class.get_job_info = mock.MagicMock( return_value={'status': 'PENDING'}) oje.get_client = mock.MagicMock(return_value=client_class) _, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG) self.assertIsNone(oje.get_job_status(job_exec)) job_exec.engine_job_id = 1 self.assertEqual({'status': 'PENDING'}, oje.get_job_status(job_exec))
def test_run_job_handles_exceptions_with_run_job(self, canceljob, runjob, job_ex_get, job_ex_upd): runjob.side_effect = ex.OozieException("run_job failed") job, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG) job_exec.engine_job_id = "fake_oozie_id" job_ex_get.return_value = job_exec job_manager.run_job(job_exec.id) self.assertEqual(1, job_ex_get.call_count) self.assertEqual(1, job_ex_upd.call_count) new_status = job_ex_upd.call_args[0][2]["info"]["status"] self.assertEqual(edp.JOB_STATUS_FAILED, new_status) self.assertEqual(1, canceljob.call_count)
def test_cancel_job(self, kill_get, info_get): info_get.return_value = {} oje = FakeOozieJobEngine(u.create_cluster()) _, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG) # test cancel job without engine_job_id job_exec.engine_job_id = None oje.cancel_job(job_exec) self.assertEqual(0, kill_get.call_count) # test cancel job with engine_job_id job_exec.engine_job_id = 123 oje.cancel_job(job_exec) self.assertEqual(1, kill_get.call_count)
def test_suspend_unsuspendible_job(self, suspend_job_get, cluster_get, job_exec_get, job_get): info = { 'status': edp.JOB_STATUS_SUCCEEDED } job, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG, None, False, info) job_exec_get.return_value = job_exec job_get.return_value = job cluster = u.create_cluster() cluster.status = "Active" cluster_get.return_value = cluster self.assertEqual(0, suspend_job_get.call_count)
def test_get_data_sources(self, ds): def _conductor_data_source_get(ctx, id): return mock.Mock(id=id, url="obj_" + id) job, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG) job_exec.input_id = "s1" job_exec.output_id = "s2" ds.side_effect = _conductor_data_source_get input_source, output_source = job_utils.get_data_sources(job_exec, job, {}) self.assertEqual("obj_s1", input_source.url) self.assertEqual("obj_s2", output_source.url)
def test_get_data_sources_with_null_id(self): configs = {sw.HADOOP_SWIFT_USERNAME: "******", sw.HADOOP_SWIFT_PASSWORD: "******"} configs = {"configs": configs, "args": ["swift://ex/i", "output_path"]} job, job_exec = u.create_job_exec(edp.JOB_TYPE_JAVA, configs) job_exec.input_id = None job_exec.output_id = None input_source, output_source = job_utils.get_data_sources(job_exec, job, {}) self.assertIsNone(input_source) self.assertIsNone(output_source)
def test_cancel_job(self, kill_get, info_get): info_get.return_value = {} oje = FakeOozieJobEngine(u.create_cluster()) _, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG) # test cancel job without engine_job_id job_exec.engine_job_id = None oje.cancel_job(job_exec) self.assertEqual(0, kill_get.call_count) # test cancel job with engine_job_id job_exec.engine_job_id = 123 oje.cancel_job(job_exec) self.assertEqual(1, kill_get.call_count)
def test_get_data_sources_java(self): configs = { sw.HADOOP_SWIFT_USERNAME: '******', sw.HADOOP_SWIFT_PASSWORD: '******' } configs = {'configs': configs, 'args': ['swift://ex/i', 'output_path']} job, job_exec = u.create_job_exec(edp.JOB_TYPE_JAVA, configs) input_source, output_source = (job_utils.get_data_sources( job_exec, job)) self.assertEqual(None, input_source) self.assertEqual(None, output_source)
def test_get_input_output_data_sources(self, ds): def _conductor_data_source_get(ctx, id): return mock.Mock(id=id, url="hdfs://obj_" + id, type='hdfs') job, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG) job_exec.input_id = 's1' job_exec.output_id = 's2' ds.side_effect = _conductor_data_source_get input_source, output_source = ( job_utils.get_input_output_data_sources(job_exec, job, {})) self.assertEqual('hdfs://obj_s1', input_source.url) self.assertEqual('hdfs://obj_s2', output_source.url)
def test_get_data_sources(self, ds): def _conductor_data_source_get(ctx, id): return mock.Mock(id=id, url="obj_" + id) job, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG) job_exec.input_id = 's1' job_exec.output_id = 's2' ds.side_effect = _conductor_data_source_get input_source, output_source = ( job_utils.get_data_sources(job_exec, job, {})) self.assertEqual('obj_s1', input_source.url) self.assertEqual('obj_s2', output_source.url)
def test_run_job_handles_exceptions_with_run_job(self, canceljob, runjob, job_ex_get, job_ex_upd): runjob.side_effect = ex.OozieException("run_job failed") job, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG) job_exec.engine_job_id = "fake_oozie_id" job_ex_get.return_value = job_exec job_manager.run_job(job_exec.id) self.assertEqual(1, job_ex_get.call_count) self.assertEqual(1, job_ex_upd.call_count) new_status = job_ex_upd.call_args[0][2]["info"]["status"] self.assertEqual(edp.JOB_STATUS_FAILED, new_status) self.assertEqual(1, canceljob.call_count)
def test_build_workflow_for_job_shell(self): configs = {"configs": {"k1": "v1"}, "params": {"p1": "v1"}, "args": ["a1", "a2"]} job, job_exec = u.create_job_exec(edp.JOB_TYPE_SHELL, configs) res = workflow_factory.get_workflow_xml( job, u.create_cluster(), job_exec.job_configs) self.assertIn("<name>k1</name>", res) self.assertIn("<value>v1</value>", res) self.assertIn("<env-var>p1=v1</env-var>", res) self.assertIn("<argument>a1</argument>", res) self.assertIn("<argument>a2</argument>", res)
def test_get_data_sources(self, ds): def _conductor_data_source_get(ctx, id): return "obj_" + id job, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG) job_exec.input_id = 's1' job_exec.output_id = 's2' ds.side_effect = _conductor_data_source_get input_source, output_source = ( job_utils.get_data_sources(job_exec, job)) self.assertEqual('obj_s1', input_source) self.assertEqual('obj_s2', output_source)
def test_run_job_handles_exceptions(self, canceljob, runjob, job_ex_get, job_ex_upd): runjob.side_effect = ex.SwiftClientException("Unauthorised") job, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG) job_exec.engine_job_id = None job_ex_get.return_value = job_exec job_manager.run_job(job_exec.id) self.assertEqual(1, job_ex_get.call_count) self.assertEqual(1, job_ex_upd.call_count) new_status = job_ex_upd.call_args[0][2]["info"]["status"] self.assertEqual(edp.JOB_STATUS_FAILED, new_status) self.assertEqual(0, canceljob.call_count)
def _build_workflow_common(self, job_type, streaming=False): if streaming: configs = {'edp.streaming.mapper': '/usr/bin/cat', 'edp.streaming.reducer': '/usr/bin/wc'} configs = {'configs': configs} else: configs = {} job, job_exec = u.create_job_exec(job_type, configs) input_data = u.create_data_source('swift://ex/i') output_data = u.create_data_source('swift://ex/o') res = workflow_factory.get_workflow_xml( job, u.create_cluster(), job_exec, input_data, output_data, 'hadoop') if streaming: self.assertIn(""" <streaming> <mapper>/usr/bin/cat</mapper> <reducer>/usr/bin/wc</reducer> </streaming>""", res) self.assertIn(""" <property> <name>mapred.output.dir</name> <value>swift://ex.sahara/o</value> </property>""", res) self.assertIn(""" <property> <name>mapred.input.dir</name> <value>swift://ex.sahara/i</value> </property>""", res) self.assertIn(""" <property> <name>fs.swift.service.sahara.password</name> <value>admin1</value> </property>""", res) self.assertIn(""" <property> <name>fs.swift.service.sahara.username</name> <value>admin</value> </property>""", res)
def test_failed_to_cancel_job(self, time_get, cluster_get, job_exec_get, job_get, job_execution_update_get): info = {"status": edp.JOB_STATUS_RUNNING} job, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG, None, False, info) job_exec_get.return_value = job_exec job_get.return_value = job cluster = u.create_cluster() cluster.status = c_u.CLUSTER_STATUS_ACTIVE cluster_get.return_value = cluster time_get.return_value = 10000 job_execution_update_get.return_value = job_exec with testtools.ExpectedException(ex.CancelingFailed): job_manager.cancel_job(job_exec.id)
def test_get_data_sources_java(self): configs = {sw.HADOOP_SWIFT_USERNAME: '******', sw.HADOOP_SWIFT_PASSWORD: '******'} configs = { 'configs': configs, 'args': ['swift://ex/i', 'output_path'] } job, job_exec = u.create_job_exec(edp.JOB_TYPE_JAVA, configs) input_source, output_source = ( job_utils.get_data_sources(job_exec, job)) self.assertEqual(None, input_source) self.assertEqual(None, output_source)
def test_get_input_output_data_sources_with_null_id(self): configs = { sw.HADOOP_SWIFT_USERNAME: '******', sw.HADOOP_SWIFT_PASSWORD: '******' } configs = {'configs': configs, 'args': ['hdfs://ex/i', 'output_path']} job, job_exec = u.create_job_exec(edp.JOB_TYPE_JAVA, configs) job_exec.input_id = None job_exec.output_id = None input_source, output_source = (job_utils.get_input_output_data_sources( job_exec, job, {})) self.assertIsNone(input_source) self.assertIsNone(output_source)
def test_failed_to_cancel_job(self, time_get, cluster_get, job_exec_get, job_get, job_execution_update_get): info = {'status': edp.JOB_STATUS_RUNNING} job, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG, None, False, info) job_exec_get.return_value = job_exec job_get.return_value = job cluster = u.create_cluster() cluster.status = c_u.CLUSTER_STATUS_ACTIVE cluster_get.return_value = cluster time_get.return_value = 10000 job_execution_update_get.return_value = job_exec with testtools.ExpectedException(ex.CancelingFailed): job_manager.cancel_job(job_exec.id)
def test_scheduled_edp_job_run(self, job_exec_get, cluster_get, job_get, run_scheduled_job): configs = { 'job_execution_info': { 'job_execution_type': 'scheduled', 'start': '2015-5-15T01:00Z' } } job, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG, configs) job_exec_get.return_value = job_exec job_get.return_value = job cluster = u.create_cluster() cluster.status = "Active" cluster_get.return_value = cluster job_manager._run_job(job_exec.id) self.assertEqual(1, run_scheduled_job.call_count)
def test_get_data_sources_with_null_id(self): configs = {sw.HADOOP_SWIFT_USERNAME: '******', sw.HADOOP_SWIFT_PASSWORD: '******'} configs = { 'configs': configs, 'args': ['swift://ex/i', 'output_path'] } job, job_exec = u.create_job_exec(edp.JOB_TYPE_JAVA, configs) job_exec.input_id = None job_exec.output_id = None input_source, output_source = ( job_utils.get_data_sources(job_exec, job, {})) self.assertIsNone(input_source) self.assertIsNone(output_source)
def test_build_workflow_for_job_java(self): # If args include swift paths, user and password values # will have to be supplied via configs instead of being # lifted from input or output data sources configs = {sw.HADOOP_SWIFT_USERNAME: '******', sw.HADOOP_SWIFT_PASSWORD: '******'} configs = { 'configs': configs, 'args': ['swift://ex/i', 'output_path'] } job, job_exec = u.create_job_exec(edp.JOB_TYPE_JAVA, configs) res = workflow_factory.get_workflow_xml( job, u.create_cluster(), job_exec) self.assertIn(""" <configuration> <property> <name>fs.swift.service.sahara.password</name> <value>admin1</value> </property> <property> <name>fs.swift.service.sahara.username</name> <value>admin</value> </property> </configuration> <main-class>%s</main-class> <java-opts>%s</java-opts> <arg>swift://ex.sahara/i</arg> <arg>output_path</arg>""" % (_java_main_class, _java_opts), res) # testing workflow creation with a proxy domain self.override_config('use_domain_for_proxy_users', True) self.override_config("proxy_user_domain_name", 'sahara_proxy_domain') configs = { 'configs': {}, 'args': ['swift://ex/i', 'output_path'] } job, job_exec = u.create_job_exec(edp.JOB_TYPE_JAVA, configs, proxy=True) res = workflow_factory.get_workflow_xml(job, u.create_cluster(), job_exec) self.assertIn(""" <configuration> <property> <name>fs.swift.service.sahara.domain.name</name> <value>sahara_proxy_domain</value> </property> <property> <name>fs.swift.service.sahara.password</name> <value>55555555-6666-7777-8888-999999999999</value> </property> <property> <name>fs.swift.service.sahara.trust.id</name> <value>0123456789abcdef0123456789abcdef</value> </property> <property> <name>fs.swift.service.sahara.username</name> <value>job_00000000-1111-2222-3333-4444444444444444</value> </property> </configuration> <main-class>%s</main-class> <java-opts>%s</java-opts> <arg>swift://ex.sahara/i</arg> <arg>output_path</arg>""" % (_java_main_class, _java_opts), res)
def test_build_workflow_for_job_hive(self, job_binary): job, job_exec = u.create_job_exec(edp.JOB_TYPE_HIVE, configs={}) job_binary.return_value = {"name": "script.q"} input_data = u.create_data_source('swift://ex/i') output_data = u.create_data_source('swift://ex/o') data_source_urls = { input_data.id: input_data.url, output_data.id: output_data.url } res = workflow_factory.get_workflow_xml(job, u.create_cluster(), job_exec.job_configs, input_data, output_data, 'hadoop', data_source_urls) doc = xml.parseString(res) hive = doc.getElementsByTagName('hive')[0] self.assertEqual('/user/hadoop/conf/hive-site.xml', xmlutils.get_text_from_node(hive, 'job-xml')) configuration = hive.getElementsByTagName('configuration') properties = xmlutils.get_property_dict(configuration[0]) self.assertEqual( { 'fs.swift.service.sahara.password': '******', 'fs.swift.service.sahara.username': '******' }, properties) self.assertEqual('script.q', xmlutils.get_text_from_node(hive, 'script')) params = xmlutils.get_param_dict(hive) self.assertEqual( { 'INPUT': 'swift://ex.sahara/i', 'OUTPUT': 'swift://ex.sahara/o' }, params) # testing workflow creation with a proxy domain self.override_config('use_domain_for_proxy_users', True) self.override_config("proxy_user_domain_name", 'sahara_proxy_domain') job, job_exec = u.create_job_exec(edp.JOB_TYPE_HIVE, proxy=True) res = workflow_factory.get_workflow_xml(job, u.create_cluster(), job_exec.job_configs, input_data, output_data, 'hadoop', data_source_urls) doc = xml.parseString(res) hive = doc.getElementsByTagName('hive')[0] configuration = hive.getElementsByTagName('configuration') properties = xmlutils.get_property_dict(configuration[0]) self.assertEqual( { 'fs.swift.service.sahara.domain.name': 'sahara_proxy_domain', 'fs.swift.service.sahara.trust.id': '0123456789abcdef0123456789abcdef', 'fs.swift.service.sahara.password': '******', 'fs.swift.service.sahara.username': '******' }, properties)
def test_build_workflow_for_job_java(self): # If args include swift paths, user and password values # will have to be supplied via configs instead of being # lifted from input or output data sources configs = {sw.HADOOP_SWIFT_USERNAME: '******', sw.HADOOP_SWIFT_PASSWORD: '******'} configs = { 'configs': configs, 'args': ['swift://ex/i', 'output_path'] } job, job_exec = u.create_job_exec(edp.JOB_TYPE_JAVA, configs) res = workflow_factory.get_workflow_xml( job, u.create_cluster(), job_exec.job_configs) self.assertIn(""" <configuration> <property> <name>fs.swift.service.sahara.password</name> <value>admin1</value> </property> <property> <name>fs.swift.service.sahara.username</name> <value>admin</value> </property> </configuration> <main-class>%s</main-class> <java-opts>%s</java-opts> <arg>swift://ex.sahara/i</arg> <arg>output_path</arg>""" % (_java_main_class, _java_opts), res) # testing workflow creation with a proxy domain self.override_config('use_domain_for_proxy_users', True) self.override_config("proxy_user_domain_name", 'sahara_proxy_domain') configs = { 'configs': {}, 'args': ['swift://ex/i', 'output_path'] } job, job_exec = u.create_job_exec(edp.JOB_TYPE_JAVA, configs, proxy=True) res = workflow_factory.get_workflow_xml(job, u.create_cluster(), job_exec.job_configs) self.assertIn(""" <configuration> <property> <name>fs.swift.service.sahara.domain.name</name> <value>sahara_proxy_domain</value> </property> <property> <name>fs.swift.service.sahara.password</name> <value>55555555-6666-7777-8888-999999999999</value> </property> <property> <name>fs.swift.service.sahara.trust.id</name> <value>0123456789abcdef0123456789abcdef</value> </property> <property> <name>fs.swift.service.sahara.username</name> <value>job_00000000-1111-2222-3333-4444444444444444</value> </property> </configuration> <main-class>%s</main-class> <java-opts>%s</java-opts> <arg>swift://ex.sahara/i</arg> <arg>output_path</arg>""" % (_java_main_class, _java_opts), res)
def test_build_workflow_swift_configs(self, job_binary): # Test that swift configs come from either input or output data sources job, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG, configs={}) job_binary.return_value = {"name": "script.pig"} input_data = u.create_data_source('swift://ex/i') output_data = u.create_data_source('hdfs://user/hadoop/out') data_source_urls = {input_data.id: input_data.url, output_data.id: output_data.url} res = workflow_factory.get_workflow_xml( job, u.create_cluster(), job_exec.job_configs, input_data, output_data, 'hadoop', data_source_urls) self.assertIn(""" <configuration> <property> <name>fs.swift.service.sahara.password</name> <value>admin1</value> </property> <property> <name>fs.swift.service.sahara.username</name> <value>admin</value> </property> </configuration>""", res) input_data = u.create_data_source('hdfs://user/hadoop/in') output_data = u.create_data_source('swift://ex/o') data_source_urls = {input_data.id: input_data.url, output_data.id: output_data.url} res = workflow_factory.get_workflow_xml( job, u.create_cluster(), job_exec.job_configs, input_data, output_data, 'hadoop', data_source_urls) self.assertIn(""" <configuration> <property> <name>fs.swift.service.sahara.password</name> <value>admin1</value> </property> <property> <name>fs.swift.service.sahara.username</name> <value>admin</value> </property> </configuration>""", res) job, job_exec = u.create_job_exec( edp.JOB_TYPE_PIG, configs={'configs': {'dummy': 'value'}}) input_data = u.create_data_source('hdfs://user/hadoop/in') output_data = u.create_data_source('hdfs://user/hadoop/out') data_source_urls = {input_data.id: input_data.url, output_data.id: output_data.url} res = workflow_factory.get_workflow_xml( job, u.create_cluster(), job_exec.job_configs, input_data, output_data, 'hadoop', data_source_urls) self.assertIn(""" <configuration> <property> <name>dummy</name> <value>value</value> </property> </configuration>""", res)
def test_build_workflow_for_job_pig(self, job_binary): job, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG, configs={}) job_binary.return_value = {"name": "script.pig"} input_data = u.create_data_source('swift://ex/i') output_data = u.create_data_source('swift://ex/o') data_source_urls = { input_data.id: input_data.url, output_data.id: output_data.url } res = workflow_factory.get_workflow_xml(job, u.create_cluster(), job_exec.job_configs, input_data, output_data, 'hadoop', data_source_urls) self.assertIn( """ <param>INPUT=swift://ex.sahara/i</param> <param>OUTPUT=swift://ex.sahara/o</param>""", res) self.assertIn( """ <configuration> <property> <name>fs.swift.service.sahara.password</name> <value>admin1</value> </property> <property> <name>fs.swift.service.sahara.username</name> <value>admin</value> </property> </configuration>""", res) self.assertIn("<script>script.pig</script>", res) # testing workflow creation with a proxy domain self.override_config('use_domain_for_proxy_users', True) self.override_config("proxy_user_domain_name", 'sahara_proxy_domain') job, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG, proxy=True) res = workflow_factory.get_workflow_xml(job, u.create_cluster(), job_exec.job_configs, input_data, output_data, 'hadoop', data_source_urls) self.assertIn( """ <configuration> <property> <name>fs.swift.service.sahara.domain.name</name> <value>sahara_proxy_domain</value> </property> <property> <name>fs.swift.service.sahara.password</name> <value>55555555-6666-7777-8888-999999999999</value> </property> <property> <name>fs.swift.service.sahara.trust.id</name> <value>0123456789abcdef0123456789abcdef</value> </property> <property> <name>fs.swift.service.sahara.username</name> <value>job_00000000-1111-2222-3333-4444444444444444</value> </property> </configuration>""", res)
def _build_workflow_common(self, job_type, streaming=False, proxy=False): if streaming: configs = {'edp.streaming.mapper': '/usr/bin/cat', 'edp.streaming.reducer': '/usr/bin/wc'} configs = {'configs': configs} else: configs = {} job, job_exec = u.create_job_exec(job_type, configs) input_data = u.create_data_source('swift://ex/i') output_data = u.create_data_source('swift://ex/o') data_source_urls = {input_data.id: input_data.url, output_data.id: output_data.url} res = workflow_factory.get_workflow_xml( job, u.create_cluster(), job_exec.job_configs, input_data, output_data, 'hadoop', data_source_urls) if streaming: self.assertIn(""" <streaming> <mapper>/usr/bin/cat</mapper> <reducer>/usr/bin/wc</reducer> </streaming>""", res) self.assertIn(""" <property> <name>mapred.output.dir</name> <value>swift://ex.sahara/o</value> </property>""", res) self.assertIn(""" <property> <name>mapred.input.dir</name> <value>swift://ex.sahara/i</value> </property>""", res) if not proxy: self.assertIn(""" <property> <name>fs.swift.service.sahara.password</name> <value>admin1</value> </property>""", res) self.assertIn(""" <property> <name>fs.swift.service.sahara.username</name> <value>admin</value> </property>""", res) else: # testing workflow creation with a proxy domain self.override_config('use_domain_for_proxy_users', True) self.override_config("proxy_user_domain_name", 'sahara_proxy_domain') job, job_exec = u.create_job_exec(job_type, proxy=True) res = workflow_factory.get_workflow_xml( job, u.create_cluster(), job_exec.job_configs, input_data, output_data, 'hadoop', data_source_urls) self.assertIn(""" <property> <name>fs.swift.service.sahara.domain.name</name> <value>sahara_proxy_domain</value> </property> <property> <name>fs.swift.service.sahara.password</name> <value>55555555-6666-7777-8888-999999999999</value> </property> <property> <name>fs.swift.service.sahara.trust.id</name> <value>0123456789abcdef0123456789abcdef</value> </property> <property> <name>fs.swift.service.sahara.username</name> <value>job_00000000-1111-2222-3333-4444444444444444</value> </property>""", res)