def test_build_workflow_for_job_hive(self, job_binary): job, job_exec = u.create_job_exec(edp.JOB_TYPE_HIVE, configs={}) job_binary.return_value = {"name": "script.q"} input_data = u.create_data_source('swift://ex/i') output_data = u.create_data_source('swift://ex/o') res = workflow_factory.get_workflow_xml(job, u.create_cluster(), job_exec, input_data, output_data, 'hadoop') doc = xml.parseString(res) hive = doc.getElementsByTagName('hive')[0] self.assertEqual(xmlutils.get_text_from_node(hive, 'job-xml'), '/user/hadoop/conf/hive-site.xml') configuration = hive.getElementsByTagName('configuration') properties = xmlutils.get_property_dict(configuration[0]) self.assertEqual( { 'fs.swift.service.sahara.password': '******', 'fs.swift.service.sahara.username': '******' }, properties) self.assertEqual(xmlutils.get_text_from_node(hive, 'script'), 'script.q') params = xmlutils.get_param_dict(hive) self.assertEqual( { 'INPUT': 'swift://ex.sahara/i', 'OUTPUT': 'swift://ex.sahara/o' }, params) # testing workflow creation with a proxy domain self.override_config('use_domain_for_proxy_users', True) self.override_config("proxy_user_domain_name", 'sahara_proxy_domain') job, job_exec = u.create_job_exec(edp.JOB_TYPE_HIVE, proxy=True) res = workflow_factory.get_workflow_xml(job, u.create_cluster(), job_exec, input_data, output_data, 'hadoop') doc = xml.parseString(res) hive = doc.getElementsByTagName('hive')[0] configuration = hive.getElementsByTagName('configuration') properties = xmlutils.get_property_dict(configuration[0]) self.assertEqual( { 'fs.swift.service.sahara.domain.name': 'sahara_proxy_domain', 'fs.swift.service.sahara.trust.id': '0123456789abcdef0123456789abcdef', 'fs.swift.service.sahara.password': '******', 'fs.swift.service.sahara.username': '******' }, properties)
def test_job_type_supported(self, job_get): job, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG) job_get.return_value = job self.assertIsNotNone(job_manager._get_job_engine(u.create_cluster(), job_exec)) job.type = "unsupported_type" self.assertIsNone(job_manager._get_job_engine(u.create_cluster(), job_exec))
def test_build_workflow_for_job_pig(self, job_binary): job, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG, configs={}) job_binary.return_value = {"name": "script.pig"} input_data = u.create_data_source('swift://ex/i') output_data = u.create_data_source('swift://ex/o') data_source_urls = {input_data.id: input_data.url, output_data.id: output_data.url} res = workflow_factory.get_workflow_xml( job, u.create_cluster(), job_exec.job_configs, input_data, output_data, 'hadoop', data_source_urls) self.assertIn(""" <param>INPUT=swift://ex.sahara/i</param> <param>OUTPUT=swift://ex.sahara/o</param>""", res) self.assertIn(""" <configuration> <property> <name>fs.swift.service.sahara.password</name> <value>admin1</value> </property> <property> <name>fs.swift.service.sahara.username</name> <value>admin</value> </property> </configuration>""", res) self.assertIn("<script>script.pig</script>", res) # testing workflow creation with a proxy domain self.override_config('use_domain_for_proxy_users', True) self.override_config("proxy_user_domain_name", 'sahara_proxy_domain') job, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG, proxy=True) res = workflow_factory.get_workflow_xml( job, u.create_cluster(), job_exec.job_configs, input_data, output_data, 'hadoop', data_source_urls) self.assertIn(""" <configuration> <property> <name>fs.swift.service.sahara.domain.name</name> <value>sahara_proxy_domain</value> </property> <property> <name>fs.swift.service.sahara.password</name> <value>55555555-6666-7777-8888-999999999999</value> </property> <property> <name>fs.swift.service.sahara.trust.id</name> <value>0123456789abcdef0123456789abcdef</value> </property> <property> <name>fs.swift.service.sahara.username</name> <value>job_00000000-1111-2222-3333-4444444444444444</value> </property> </configuration>""", res)
def test_build_workflow_swift_configs(self, job_binary): # Test that swift configs come from either input or output data sources job, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG, configs={}) job_binary.return_value = {"name": "script.pig"} input_data = u.create_data_source('swift://ex/i') output_data = u.create_data_source('hdfs://user/hadoop/out') res = workflow_factory.get_workflow_xml( job, u.create_cluster(), job_exec, input_data, output_data, 'hadoop') self.assertIn(""" <configuration> <property> <name>fs.swift.service.sahara.password</name> <value>admin1</value> </property> <property> <name>fs.swift.service.sahara.username</name> <value>admin</value> </property> </configuration>""", res) input_data = u.create_data_source('hdfs://user/hadoop/in') output_data = u.create_data_source('swift://ex/o') res = workflow_factory.get_workflow_xml( job, u.create_cluster(), job_exec, input_data, output_data, 'hadoop') self.assertIn(""" <configuration> <property> <name>fs.swift.service.sahara.password</name> <value>admin1</value> </property> <property> <name>fs.swift.service.sahara.username</name> <value>admin</value> </property> </configuration>""", res) job, job_exec = u.create_job_exec( edp.JOB_TYPE_PIG, configs={'configs': {'dummy': 'value'}}) input_data = u.create_data_source('hdfs://user/hadoop/in') output_data = u.create_data_source('hdfs://user/hadoop/out') res = workflow_factory.get_workflow_xml( job, u.create_cluster(), job_exec, input_data, output_data, 'hadoop') self.assertIn(""" <configuration> <property> <name>dummy</name> <value>value</value> </property> </configuration>""", res)
def test_build_workflow_for_job_hive(self, job_binary): job, job_exec = u.create_job_exec(edp.JOB_TYPE_HIVE, configs={}) job_binary.return_value = {"name": "script.q"} input_data = u.create_data_source('swift://ex/i') output_data = u.create_data_source('swift://ex/o') data_source_urls = {input_data.id: input_data.url, output_data.id: output_data.url} res = workflow_factory.get_workflow_xml( job, u.create_cluster(), job_exec.job_configs, input_data, output_data, 'hadoop', data_source_urls) doc = xml.parseString(res) hive = doc.getElementsByTagName('hive')[0] self.assertEqual('/user/hadoop/conf/hive-site.xml', xmlutils.get_text_from_node(hive, 'job-xml')) configuration = hive.getElementsByTagName('configuration') properties = xmlutils.get_property_dict(configuration[0]) self.assertEqual({'fs.swift.service.sahara.password': '******', 'fs.swift.service.sahara.username': '******'}, properties) self.assertEqual('script.q', xmlutils.get_text_from_node(hive, 'script')) params = xmlutils.get_param_dict(hive) self.assertEqual({'INPUT': 'swift://ex.sahara/i', 'OUTPUT': 'swift://ex.sahara/o'}, params) # testing workflow creation with a proxy domain self.override_config('use_domain_for_proxy_users', True) self.override_config("proxy_user_domain_name", 'sahara_proxy_domain') job, job_exec = u.create_job_exec(edp.JOB_TYPE_HIVE, proxy=True) res = workflow_factory.get_workflow_xml( job, u.create_cluster(), job_exec.job_configs, input_data, output_data, 'hadoop', data_source_urls) doc = xml.parseString(res) hive = doc.getElementsByTagName('hive')[0] configuration = hive.getElementsByTagName('configuration') properties = xmlutils.get_property_dict(configuration[0]) self.assertEqual({ 'fs.swift.service.sahara.domain.name': 'sahara_proxy_domain', 'fs.swift.service.sahara.trust.id': '0123456789abcdef0123456789abcdef', 'fs.swift.service.sahara.password': '******', 'fs.swift.service.sahara.username': '******'}, properties)
def test_job_type_supported(self, job_get): job, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG) job_get.return_value = job self.assertIsNotNone(job_manager.get_job_engine(u.create_cluster(), job_exec)) job.type = "unsupported_type" self.assertIsNone(job_manager.get_job_engine(u.create_cluster(), job_exec))
def test_build_workflow_for_job_hive(self, job_binary): job, job_exec = u.create_job_exec(edp.JOB_TYPE_HIVE, configs={}) job_binary.return_value = {"name": "script.q"} input_data = u.create_data_source("swift://ex/i") output_data = u.create_data_source("swift://ex/o") data_source_urls = {input_data.id: input_data.url, output_data.id: output_data.url} res = workflow_factory.get_workflow_xml( job, u.create_cluster(), job_exec.job_configs, input_data, output_data, "hadoop", data_source_urls ) doc = xml.parseString(res) hive = doc.getElementsByTagName("hive")[0] self.assertEqual("/user/hadoop/conf/hive-site.xml", xmlutils.get_text_from_node(hive, "job-xml")) configuration = hive.getElementsByTagName("configuration") properties = xmlutils.get_property_dict(configuration[0]) self.assertEqual( {"fs.swift.service.sahara.password": "******", "fs.swift.service.sahara.username": "******"}, properties ) self.assertEqual("script.q", xmlutils.get_text_from_node(hive, "script")) params = xmlutils.get_param_dict(hive) self.assertEqual({"INPUT": "swift://ex.sahara/i", "OUTPUT": "swift://ex.sahara/o"}, params) # testing workflow creation with a proxy domain self.override_config("use_domain_for_proxy_users", True) self.override_config("proxy_user_domain_name", "sahara_proxy_domain") job, job_exec = u.create_job_exec(edp.JOB_TYPE_HIVE, proxy=True) res = workflow_factory.get_workflow_xml( job, u.create_cluster(), job_exec.job_configs, input_data, output_data, "hadoop", data_source_urls ) doc = xml.parseString(res) hive = doc.getElementsByTagName("hive")[0] configuration = hive.getElementsByTagName("configuration") properties = xmlutils.get_property_dict(configuration[0]) self.assertEqual( { "fs.swift.service.sahara.domain.name": "sahara_proxy_domain", "fs.swift.service.sahara.trust.id": "0123456789abcdef0123456789abcdef", "fs.swift.service.sahara.password": "******", "fs.swift.service.sahara.username": "******", }, properties, )
def test_get_oozie_job_params(self): oje = FakeOozieJobEngine(u.create_cluster()) oozie_params = {'oozie.libpath': '/mylibpath', 'oozie.wf.application.path': '/wrong'} scheduled_params = {'start': '2015-06-10T06:05Z', 'end': '2015-06-10T06:50Z', 'frequency': '10'} job_dir = '/job_dir' job_execution_type = 'workflow' job_params = oje._get_oozie_job_params('hadoop', '/tmp', oozie_params, True, scheduled_params, job_dir, job_execution_type) self.assertEqual('http://localhost:50030', job_params["jobTracker"]) self.assertEqual('hdfs://localhost:8020', job_params["nameNode"]) self.assertEqual('hadoop', job_params["user.name"]) self.assertEqual('hdfs://localhost:8020/tmp', job_params['oozie.wf.application.path']) self.assertEqual("/mylibpath,hdfs://localhost:8020/user/" "sahara-hbase-lib", job_params['oozie.libpath']) # Make sure this doesn't raise an exception job_params = oje._get_oozie_job_params('hadoop', '/tmp', {}, True) self.assertEqual("hdfs://localhost:8020/user/" "sahara-hbase-lib", job_params['oozie.libpath'])
def _build_workflow_with_conf_common(self, job_type): input_data = u.create_data_source('swift://ex/i') output_data = u.create_data_source('swift://ex/o') job, job_exec = u.create_job_exec(job_type, configs={"configs": {'c': 'f'}}) res = workflow_factory.get_workflow_xml( job, u.create_cluster(), job_exec, input_data, output_data, 'hadoop') self.assertIn(""" <property> <name>c</name> <value>f</value> </property>""", res) self.assertIn(""" <property> <name>mapred.input.dir</name> <value>swift://ex.sahara/i</value> </property>""", res) self.assertIn(""" <property> <name>mapred.output.dir</name> <value>swift://ex.sahara/o</value> </property>""", res)
def test_get_oozie_job_params(self): oje = FakeOozieJobEngine(u.create_cluster()) oozie_params = { 'oozie.libpath': '/mylibpath', 'oozie.wf.application.path': '/wrong' } scheduled_params = { 'start': '2015-06-10T06:05Z', 'end': '2015-06-10T06:50Z', 'frequency': '10' } job_dir = '/job_dir' job_execution_type = 'workflow' job_params = oje._get_oozie_job_params('hadoop', '/tmp', oozie_params, True, scheduled_params, job_dir, job_execution_type) self.assertEqual('http://localhost:50030', job_params["jobTracker"]) self.assertEqual('hdfs://localhost:8020', job_params["nameNode"]) self.assertEqual('hadoop', job_params["user.name"]) self.assertEqual('hdfs://localhost:8020/tmp', job_params['oozie.wf.application.path']) self.assertEqual( "/mylibpath,hdfs://localhost:8020/user/" "sahara-hbase-lib", job_params['oozie.libpath']) # Make sure this doesn't raise an exception job_params = oje._get_oozie_job_params('hadoop', '/tmp', {}, True) self.assertEqual("hdfs://localhost:8020/user/" "sahara-hbase-lib", job_params['oozie.libpath'])
def test_run_job(self, exec_get, job, data_source, update, remote, wf_factory, get_ds_urls, prepare_cluster): wf_factory.return_value = mock.MagicMock() remote_class = mock.MagicMock() remote_class.__exit__.return_value = 'closed' remote.return_value = remote_class job_class = mock.MagicMock() job.return_value = job_class job.name = "myJob" source = mock.MagicMock() source.url = "localhost" data_source.return_value = source get_ds_urls.return_value = ('url', 'url') oje = FakeOozieJobEngine(u.create_cluster()) client_class = mock.MagicMock() client_class.add_job = mock.MagicMock(return_value=1) client_class.get_job_info = mock.MagicMock( return_value={'status': 'PENDING'}) oje.get_client = mock.MagicMock(return_value=client_class) _, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG) update.return_value = job_exec self.assertEqual((1, 'PENDING', None), oje.run_job(job_exec))
def test_prepare_run_job(self, job, data_source, update, remote, wf_factory, get_ds_urls, prepare_cluster): wf_factory.return_value = mock.MagicMock() remote_class = mock.MagicMock() remote_class.__exit__.return_value = 'closed' remote.return_value = remote_class job_class = mock.MagicMock() job_class.name = "myJob" job.return_value = job_class source = mock.MagicMock() source.url = "localhost" get_ds_urls.return_value = ('url', 'url') data_source.return_value = source oje = FakeOozieJobEngine(u.create_cluster()) _, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG) update.return_value = job_exec res = oje._prepare_run_job(job_exec) self.assertEqual(ctx.ctx(), res['context']) self.assertEqual('hadoop', res['hdfs_user']) self.assertEqual(job_exec, res['job_execution']) self.assertEqual({}, res['oozie_params'])
def test_build_workflow_for_job_hive(self, job_binary): job, job_exec = u.create_job_exec(edp.JOB_TYPE_HIVE) job_binary.return_value = {"name": "script.q"} input_data = u.create_data_source('swift://ex/i') output_data = u.create_data_source('swift://ex/o') res = workflow_factory.get_workflow_xml( job, u.create_cluster(), job_exec, input_data, output_data, 'hadoop') self.assertIn(""" <job-xml>/user/hadoop/conf/hive-site.xml</job-xml> <configuration> <property> <name>fs.swift.service.sahara.password</name> <value>admin1</value> </property> <property> <name>fs.swift.service.sahara.username</name> <value>admin</value> </property> </configuration> <script>script.q</script> <param>INPUT=swift://ex.sahara/i</param> <param>OUTPUT=swift://ex.sahara/o</param>""", res)
def test_build_workflow_for_job_java(self): # If args include swift paths, user and password values # will have to be supplied via configs instead of being # lifted from input or output data sources configs = {sw.HADOOP_SWIFT_USERNAME: '******', sw.HADOOP_SWIFT_PASSWORD: '******'} configs = { 'configs': configs, 'args': ['swift://ex/i', 'output_path'] } job, job_exec = u.create_job_exec(edp.JOB_TYPE_JAVA, configs) res = workflow_factory.get_workflow_xml( job, u.create_cluster(), job_exec) self.assertIn(""" <configuration> <property> <name>fs.swift.service.sahara.password</name> <value>admin1</value> </property> <property> <name>fs.swift.service.sahara.username</name> <value>admin</value> </property> </configuration> <main-class>%s</main-class> <java-opts>%s</java-opts> <arg>swift://ex.sahara/i</arg> <arg>output_path</arg>""" % (_java_main_class, _java_opts), res)
def test_upload_workflow_file(self, remote_get): oje = FakeOozieJobEngine(u.create_cluster()) remote_class = mock.MagicMock() remote_class.__exit__.return_value = 'closed' remote_get.return_value = remote_class res = oje._upload_workflow_file(remote_get, "test", "hadoop.xml", 'hdfs') self.assertEqual("test/workflow.xml", res)
def test_hdfs_create_workflow_dir(self, remote): remote_class = mock.MagicMock() remote_class.__exit__.return_value = 'closed' remote.return_value = remote_class oje = FakeOozieJobEngine(u.create_cluster()) job, _ = u.create_job_exec(edp.JOB_TYPE_PIG) res = oje._create_hdfs_workflow_dir(mock.Mock(), job) self.assertIn('/user/hadoop/special_name/', res)
def test_build_workflow_for_job_java_with_adapter(self, edp_conf_mock): edp_conf_mock.return_value = True configs = {"configs": {"edp.java.main_class": "some_main"}} job, job_exec = u.create_job_exec(edp.JOB_TYPE_JAVA, configs) res = workflow_factory.get_workflow_xml(job, u.create_cluster(), job_exec.job_configs) self.assertIn("<main-class>org.openstack.sahara.edp.MainWrapper</main-class>", res) self.assertNotIn("some_main", res)
def test_add_postfix(self): oje = FakeOozieJobEngine(u.create_cluster()) self.override_config("job_workflow_postfix", 'caba') res = oje._add_postfix('aba') self.assertEqual("aba/caba/", res) self.override_config("job_workflow_postfix", '') res = oje._add_postfix('aba') self.assertEqual("aba/", res)
def test_run_job_unsupported_type(self, cluster_get, job_exec_get, job_get): job, job_exec = u.create_job_exec("unsupported_type") job_exec_get.return_value = job_exec job_get.return_value = job cluster = u.create_cluster() cluster.status = c_u.CLUSTER_STATUS_ACTIVE cluster_get.return_value = cluster with testtools.ExpectedException(ex.EDPError): job_manager._run_job(job_exec.id)
def test_build_workflow_for_job_shell(self): configs = {"configs": {"k1": "v1"}, "params": {"p1": "v1"}, "args": ["a1", "a2"]} job, job_exec = u.create_job_exec(edp.JOB_TYPE_SHELL, configs) res = workflow_factory.get_workflow_xml(job, u.create_cluster(), job_exec.job_configs) self.assertIn("<name>k1</name>", res) self.assertIn("<value>v1</value>", res) self.assertIn("<env-var>p1=v1</env-var>", res) self.assertIn("<argument>a1</argument>", res) self.assertIn("<argument>a2</argument>", res)
def test_suspend_unsuspendible_job(self, suspend_job_get, cluster_get, job_exec_get, job_get): info = {'status': edp.JOB_STATUS_SUCCEEDED} job, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG, None, False, info) job_exec_get.return_value = job_exec job_get.return_value = job cluster = u.create_cluster() cluster.status = "Active" cluster_get.return_value = cluster self.assertEqual(0, suspend_job_get.call_count)
def test_build_workflow_for_job_java_with_adapter(self, edp_conf_mock): edp_conf_mock.return_value = True configs = {"configs": {"edp.java.main_class": "some_main"}} job, job_exec = u.create_job_exec(edp.JOB_TYPE_JAVA, configs) res = workflow_factory.get_workflow_xml( job, u.create_cluster(), job_exec.job_configs) self.assertIn( "<main-class>org.openstack.sahara.edp.MainWrapper</main-class>", res) self.assertNotIn("some_main", res)
def test_get_job_status(self): oje = FakeOozieJobEngine(u.create_cluster()) client_class = mock.MagicMock() client_class.add_job = mock.MagicMock(return_value=1) client_class.get_job_info = mock.MagicMock( return_value={'status': 'PENDING'}) oje.get_client = mock.MagicMock(return_value=client_class) _, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG) self.assertIsNone(oje.get_job_status(job_exec)) job_exec.engine_job_id = 1 self.assertEqual({'status': 'PENDING'}, oje.get_job_status(job_exec))
def test_scheduled_edp_job_run(self, job_exec_get, cluster_get, job_get, run_scheduled_job): configs = {"job_execution_info": {"job_execution_type": "scheduled", "start": "2015-5-15T01:00Z"}} job, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG, configs) job_exec_get.return_value = job_exec job_get.return_value = job cluster = u.create_cluster() cluster.status = "Active" cluster_get.return_value = cluster job_manager._run_job(job_exec.id) self.assertEqual(1, run_scheduled_job.call_count)
def test_cancel_job(self, kill_get, info_get): info_get.return_value = {} oje = FakeOozieJobEngine(u.create_cluster()) _, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG) # test cancel job without engine_job_id job_exec.engine_job_id = None oje.cancel_job(job_exec) self.assertEqual(0, kill_get.call_count) # test cancel job with engine_job_id job_exec.engine_job_id = 123 oje.cancel_job(job_exec) self.assertEqual(1, kill_get.call_count)
def test_hdfs_upload_job_files(self, conductor_raw_data, remote_class, remote): remote_class.__exit__.return_value = 'closed' remote.return_value = remote_class conductor_raw_data.return_value = 'ok' oje = FakeOozieJobEngine(u.create_cluster()) job, _ = u.create_job_exec(edp.JOB_TYPE_PIG) res = oje._upload_job_files_to_hdfs(mock.Mock(), 'job_prefix', job, {}) self.assertEqual(['job_prefix/script.pig'], res) job, _ = u.create_job_exec(edp.JOB_TYPE_MAPREDUCE) res = oje._upload_job_files_to_hdfs(mock.Mock(), 'job_prefix', job, {}) self.assertEqual(['job_prefix/lib/main.jar'], res)
def test_suspend_unsuspendible_job(self, suspend_job_get, cluster_get, job_exec_get, job_get): info = { 'status': edp.JOB_STATUS_SUCCEEDED } job, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG, None, False, info) job_exec_get.return_value = job_exec job_get.return_value = job cluster = u.create_cluster() cluster.status = "Active" cluster_get.return_value = cluster self.assertEqual(0, suspend_job_get.call_count)
def test_build_workflow_for_job_shell(self): configs = {"configs": {"k1": "v1"}, "params": {"p1": "v1"}, "args": ["a1", "a2"]} job, job_exec = u.create_job_exec(edp.JOB_TYPE_SHELL, configs) res = workflow_factory.get_workflow_xml( job, u.create_cluster(), job_exec.job_configs) self.assertIn("<name>k1</name>", res) self.assertIn("<value>v1</value>", res) self.assertIn("<env-var>p1=v1</env-var>", res) self.assertIn("<argument>a1</argument>", res) self.assertIn("<argument>a2</argument>", res)
def _build_workflow_common(self, job_type, streaming=False): if streaming: configs = {'edp.streaming.mapper': '/usr/bin/cat', 'edp.streaming.reducer': '/usr/bin/wc'} configs = {'configs': configs} else: configs = {} job, job_exec = u.create_job_exec(job_type, configs) input_data = u.create_data_source('swift://ex/i') output_data = u.create_data_source('swift://ex/o') res = workflow_factory.get_workflow_xml( job, u.create_cluster(), job_exec, input_data, output_data, 'hadoop') if streaming: self.assertIn(""" <streaming> <mapper>/usr/bin/cat</mapper> <reducer>/usr/bin/wc</reducer> </streaming>""", res) self.assertIn(""" <property> <name>mapred.output.dir</name> <value>swift://ex.sahara/o</value> </property>""", res) self.assertIn(""" <property> <name>mapred.input.dir</name> <value>swift://ex.sahara/i</value> </property>""", res) self.assertIn(""" <property> <name>fs.swift.service.sahara.password</name> <value>admin1</value> </property>""", res) self.assertIn(""" <property> <name>fs.swift.service.sahara.username</name> <value>admin</value> </property>""", res)
def test_failed_to_cancel_job(self, time_get, cluster_get, job_exec_get, job_get, job_execution_update_get): info = {"status": edp.JOB_STATUS_RUNNING} job, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG, None, False, info) job_exec_get.return_value = job_exec job_get.return_value = job cluster = u.create_cluster() cluster.status = c_u.CLUSTER_STATUS_ACTIVE cluster_get.return_value = cluster time_get.return_value = 10000 job_execution_update_get.return_value = job_exec with testtools.ExpectedException(ex.CancelingFailed): job_manager.cancel_job(job_exec.id)
def test_get_oozie_job_params(self): oje = FakeOozieJobEngine(u.create_cluster()) oozie_params = { 'oozie.libpath': '/mylibpath', 'oozie.wf.application.path': '/wrong' } job_params = oje._get_oozie_job_params('hadoop', '/tmp', oozie_params) self.assertEqual('http://localhost:50030', job_params["jobTracker"]) self.assertEqual('hdfs://localhost:8020', job_params["nameNode"]) self.assertEqual('hadoop', job_params["user.name"]) self.assertEqual('hdfs://localhost:8020/tmp', job_params['oozie.wf.application.path']) self.assertEqual('/mylibpath', job_params['oozie.libpath']) # Make sure this doesn't raise an exception job_params = oje._get_oozie_job_params('hadoop', '/tmp', {}) self.assertNotIn('oozie.libpath', job_params)
def test_failed_to_cancel_job(self, time_get, cluster_get, job_exec_get, job_get, job_execution_update_get): info = {'status': edp.JOB_STATUS_RUNNING} job, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG, None, False, info) job_exec_get.return_value = job_exec job_get.return_value = job cluster = u.create_cluster() cluster.status = c_u.CLUSTER_STATUS_ACTIVE cluster_get.return_value = cluster time_get.return_value = 10000 job_execution_update_get.return_value = job_exec with testtools.ExpectedException(ex.CancelingFailed): job_manager.cancel_job(job_exec.id)
def test_scheduled_edp_job_run(self, job_exec_get, cluster_get, job_get, run_scheduled_job): configs = { 'job_execution_info': { 'job_execution_type': 'scheduled', 'start': '2015-5-15T01:00Z' } } job, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG, configs) job_exec_get.return_value = job_exec job_get.return_value = job cluster = u.create_cluster() cluster.status = "Active" cluster_get.return_value = cluster job_manager._run_job(job_exec.id) self.assertEqual(1, run_scheduled_job.call_count)
def test_get_oozie_job_params(self): oje = FakeOozieJobEngine(u.create_cluster()) oozie_params = {'oozie.libpath': '/mylibpath', 'oozie.wf.application.path': '/wrong'} job_params = oje._get_oozie_job_params('hadoop', '/tmp', oozie_params, True) self.assertEqual('http://localhost:50030', job_params["jobTracker"]) self.assertEqual('hdfs://localhost:8020', job_params["nameNode"]) self.assertEqual('hadoop', job_params["user.name"]) self.assertEqual('hdfs://localhost:8020/tmp', job_params['oozie.wf.application.path']) self.assertEqual("/mylibpath,hdfs://localhost:8020/user/" "sahara-hbase-lib", job_params['oozie.libpath']) # Make sure this doesn't raise an exception job_params = oje._get_oozie_job_params('hadoop', '/tmp', {}, True) self.assertEqual("hdfs://localhost:8020/user/" "sahara-hbase-lib", job_params['oozie.libpath'])
def test__resolve_external_hdfs_urls(self): oje = FakeOozieJobEngine(u.create_cluster()) job_configs = { "configs": { "mapred.map.tasks": "1", "hdfs1": "hdfs://localhost/hdfs1"}, "args": ["hdfs://localhost/hdfs3", "10"], "params": { "param1": "10", "param2": "hdfs://localhost/hdfs2" } } expected_external_hdfs_urls = ['hdfs://localhost/hdfs1', 'hdfs://localhost/hdfs2', 'hdfs://localhost/hdfs3'] external_hdfs_urls = oje._resolve_external_hdfs_urls(job_configs) self.assertEqual(expected_external_hdfs_urls, external_hdfs_urls)
def test_build_workflow_for_job_java(self): # If args include swift paths, user and password values # will have to be supplied via configs instead of being # lifted from input or output data sources configs = {sw.HADOOP_SWIFT_USERNAME: '******', sw.HADOOP_SWIFT_PASSWORD: '******'} configs = { 'configs': configs, 'args': ['swift://ex/i', 'output_path'] } job, job_exec = u.create_job_exec(edp.JOB_TYPE_JAVA, configs) res = workflow_factory.get_workflow_xml( job, u.create_cluster(), job_exec) self.assertIn(""" <configuration> <property> <name>fs.swift.service.sahara.password</name> <value>admin1</value> </property> <property> <name>fs.swift.service.sahara.username</name> <value>admin</value> </property> </configuration> <main-class>%s</main-class> <java-opts>%s</java-opts> <arg>swift://ex.sahara/i</arg> <arg>output_path</arg>""" % (_java_main_class, _java_opts), res) # testing workflow creation with a proxy domain self.override_config('use_domain_for_proxy_users', True) self.override_config("proxy_user_domain_name", 'sahara_proxy_domain') configs = { 'configs': {}, 'args': ['swift://ex/i', 'output_path'] } job, job_exec = u.create_job_exec(edp.JOB_TYPE_JAVA, configs, proxy=True) res = workflow_factory.get_workflow_xml(job, u.create_cluster(), job_exec) self.assertIn(""" <configuration> <property> <name>fs.swift.service.sahara.domain.name</name> <value>sahara_proxy_domain</value> </property> <property> <name>fs.swift.service.sahara.password</name> <value>55555555-6666-7777-8888-999999999999</value> </property> <property> <name>fs.swift.service.sahara.trust.id</name> <value>0123456789abcdef0123456789abcdef</value> </property> <property> <name>fs.swift.service.sahara.username</name> <value>job_00000000-1111-2222-3333-4444444444444444</value> </property> </configuration> <main-class>%s</main-class> <java-opts>%s</java-opts> <arg>swift://ex.sahara/i</arg> <arg>output_path</arg>""" % (_java_main_class, _java_opts), res)
def test_get_plugin(self): plugin = job_utils.get_plugin(u.create_cluster()) self.assertEqual("vanilla", plugin.name)
def test_prepare_cluster(self, ctx): ctx.return_value = 'dummy' ds_url = "swift://container/input" ds = u.create_data_source(ds_url, name="data_source", id=uuidutils.generate_uuid()) job_configs = { 'configs': { job_utils.DATA_SOURCE_SUBST_NAME: True, job_utils.DATA_SOURCE_SUBST_UUID: True } } old_configs = copy.deepcopy(job_configs) self.s_type.prepare_cluster(ds, u.create_cluster(), job_configs=job_configs) # Swift configs should be filled in since they were blank self.assertEqual( ds.credentials['user'], job_configs['configs']['fs.swift.service.sahara.username']) self.assertEqual( ds.credentials['password'], job_configs['configs']['fs.swift.service.sahara.password']) self.assertNotEqual(old_configs, job_configs) job_configs['configs'] = { 'fs.swift.service.sahara.username': '******', 'fs.swift.service.sahara.password': '******', job_utils.DATA_SOURCE_SUBST_NAME: False, job_utils.DATA_SOURCE_SUBST_UUID: True } old_configs = copy.deepcopy(job_configs) self.s_type.prepare_cluster(ds, u.create_cluster(), job_configs=job_configs) # Swift configs should not be overwritten self.assertEqual(old_configs['configs'], job_configs['configs']) job_configs['configs'] = { job_utils.DATA_SOURCE_SUBST_NAME: True, job_utils.DATA_SOURCE_SUBST_UUID: False } job_configs['proxy_configs'] = { 'proxy_username': '******', 'proxy_password': '******', 'proxy_trust_id': 'trustme' } old_configs = copy.deepcopy(job_configs) self.s_type.prepare_cluster(ds, u.create_cluster(), job_configs=job_configs) # Swift configs should be empty and proxy configs should be preserved self.assertEqual(old_configs['configs'], job_configs['configs']) self.assertEqual(old_configs['proxy_configs'], job_configs['proxy_configs']) # If there's no configs do nothing job_configs['configs'] = None old_configs = copy.deepcopy(job_configs) self.s_type.prepare_cluster(ds, u.create_cluster(), job_configs=job_configs) self.assertEqual(old_configs, job_configs) # If it's a FrozenDict do nothing job_configs = { 'configs': { job_utils.DATA_SOURCE_SUBST_NAME: True, job_utils.DATA_SOURCE_SUBST_UUID: True } } old_configs = copy.deepcopy(job_configs) job_configs = FrozenDict(job_configs) self.s_type.prepare_cluster(ds, u.create_cluster(), job_configs=job_configs) self.assertEqual(old_configs, job_configs)
def _build_workflow_common(self, job_type, streaming=False, proxy=False): if streaming: configs = {'edp.streaming.mapper': '/usr/bin/cat', 'edp.streaming.reducer': '/usr/bin/wc'} configs = {'configs': configs} else: configs = {} job, job_exec = u.create_job_exec(job_type, configs) input_data = u.create_data_source('swift://ex/i') output_data = u.create_data_source('swift://ex/o') res = workflow_factory.get_workflow_xml( job, u.create_cluster(), job_exec, input_data, output_data, 'hadoop') if streaming: self.assertIn(""" <streaming> <mapper>/usr/bin/cat</mapper> <reducer>/usr/bin/wc</reducer> </streaming>""", res) self.assertIn(""" <property> <name>mapred.output.dir</name> <value>swift://ex.sahara/o</value> </property>""", res) self.assertIn(""" <property> <name>mapred.input.dir</name> <value>swift://ex.sahara/i</value> </property>""", res) if not proxy: self.assertIn(""" <property> <name>fs.swift.service.sahara.password</name> <value>admin1</value> </property>""", res) self.assertIn(""" <property> <name>fs.swift.service.sahara.username</name> <value>admin</value> </property>""", res) else: # testing workflow creation with a proxy domain self.override_config('use_domain_for_proxy_users', True) self.override_config("proxy_user_domain_name", 'sahara_proxy_domain') job, job_exec = u.create_job_exec(job_type, proxy=True) res = workflow_factory.get_workflow_xml( job, u.create_cluster(), job_exec, input_data, output_data, 'hadoop') self.assertIn(""" <property> <name>fs.swift.service.sahara.domain.name</name> <value>sahara_proxy_domain</value> </property> <property> <name>fs.swift.service.sahara.password</name> <value>55555555-6666-7777-8888-999999999999</value> </property> <property> <name>fs.swift.service.sahara.trust.id</name> <value>0123456789abcdef0123456789abcdef</value> </property> <property> <name>fs.swift.service.sahara.username</name> <value>job_00000000-1111-2222-3333-4444444444444444</value> </property>""", res)
def test_get_oozie_job_params(self): oje = FakeOozieJobEngine(u.create_cluster()) job_params = oje._get_oozie_job_params('hadoop', '/tmp') self.assertEqual('http://localhost:50030', job_params["jobTracker"]) self.assertEqual('hdfs://localhost:8020', job_params["nameNode"]) self.assertEqual('hadoop', job_params["user.name"])
def test_build_workflow_swift_configs(self, job_binary): # Test that swift configs come from either input or output data sources job, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG, configs={}) job_binary.return_value = {"name": "script.pig"} input_data = u.create_data_source('swift://ex/i') output_data = u.create_data_source('hdfs://user/hadoop/out') data_source_urls = {input_data.id: input_data.url, output_data.id: output_data.url} res = workflow_factory.get_workflow_xml( job, u.create_cluster(), job_exec.job_configs, input_data, output_data, 'hadoop', data_source_urls) self.assertIn(""" <configuration> <property> <name>fs.swift.service.sahara.password</name> <value>admin1</value> </property> <property> <name>fs.swift.service.sahara.username</name> <value>admin</value> </property> </configuration>""", res) input_data = u.create_data_source('hdfs://user/hadoop/in') output_data = u.create_data_source('swift://ex/o') data_source_urls = {input_data.id: input_data.url, output_data.id: output_data.url} res = workflow_factory.get_workflow_xml( job, u.create_cluster(), job_exec.job_configs, input_data, output_data, 'hadoop', data_source_urls) self.assertIn(""" <configuration> <property> <name>fs.swift.service.sahara.password</name> <value>admin1</value> </property> <property> <name>fs.swift.service.sahara.username</name> <value>admin</value> </property> </configuration>""", res) job, job_exec = u.create_job_exec( edp.JOB_TYPE_PIG, configs={'configs': {'dummy': 'value'}}) input_data = u.create_data_source('hdfs://user/hadoop/in') output_data = u.create_data_source('hdfs://user/hadoop/out') data_source_urls = {input_data.id: input_data.url, output_data.id: output_data.url} res = workflow_factory.get_workflow_xml( job, u.create_cluster(), job_exec.job_configs, input_data, output_data, 'hadoop', data_source_urls) self.assertIn(""" <configuration> <property> <name>dummy</name> <value>value</value> </property> </configuration>""", res)
def _build_workflow_common(self, job_type, streaming=False, proxy=False): if streaming: configs = {'edp.streaming.mapper': '/usr/bin/cat', 'edp.streaming.reducer': '/usr/bin/wc'} configs = {'configs': configs} else: configs = {} job, job_exec = u.create_job_exec(job_type, configs) input_data = u.create_data_source('swift://ex/i') output_data = u.create_data_source('swift://ex/o') data_source_urls = {input_data.id: input_data.url, output_data.id: output_data.url} res = workflow_factory.get_workflow_xml( job, u.create_cluster(), job_exec.job_configs, input_data, output_data, 'hadoop', data_source_urls) if streaming: self.assertIn(""" <streaming> <mapper>/usr/bin/cat</mapper> <reducer>/usr/bin/wc</reducer> </streaming>""", res) self.assertIn(""" <property> <name>mapred.output.dir</name> <value>swift://ex.sahara/o</value> </property>""", res) self.assertIn(""" <property> <name>mapred.input.dir</name> <value>swift://ex.sahara/i</value> </property>""", res) if not proxy: self.assertIn(""" <property> <name>fs.swift.service.sahara.password</name> <value>admin1</value> </property>""", res) self.assertIn(""" <property> <name>fs.swift.service.sahara.username</name> <value>admin</value> </property>""", res) else: # testing workflow creation with a proxy domain self.override_config('use_domain_for_proxy_users', True) self.override_config("proxy_user_domain_name", 'sahara_proxy_domain') job, job_exec = u.create_job_exec(job_type, proxy=True) res = workflow_factory.get_workflow_xml( job, u.create_cluster(), job_exec.job_configs, input_data, output_data, 'hadoop', data_source_urls) self.assertIn(""" <property> <name>fs.swift.service.sahara.domain.name</name> <value>sahara_proxy_domain</value> </property> <property> <name>fs.swift.service.sahara.password</name> <value>55555555-6666-7777-8888-999999999999</value> </property> <property> <name>fs.swift.service.sahara.trust.id</name> <value>0123456789abcdef0123456789abcdef</value> </property> <property> <name>fs.swift.service.sahara.username</name> <value>job_00000000-1111-2222-3333-4444444444444444</value> </property>""", res)
def test_build_workflow_for_job_java(self): # If args include swift paths, user and password values # will have to be supplied via configs instead of being # lifted from input or output data sources configs = {sw.HADOOP_SWIFT_USERNAME: '******', sw.HADOOP_SWIFT_PASSWORD: '******'} configs = { 'configs': configs, 'args': ['swift://ex/i', 'output_path'] } job, job_exec = u.create_job_exec(edp.JOB_TYPE_JAVA, configs) res = workflow_factory.get_workflow_xml( job, u.create_cluster(), job_exec.job_configs) self.assertIn(""" <configuration> <property> <name>fs.swift.service.sahara.password</name> <value>admin1</value> </property> <property> <name>fs.swift.service.sahara.username</name> <value>admin</value> </property> </configuration> <main-class>%s</main-class> <java-opts>%s</java-opts> <arg>swift://ex.sahara/i</arg> <arg>output_path</arg>""" % (_java_main_class, _java_opts), res) # testing workflow creation with a proxy domain self.override_config('use_domain_for_proxy_users', True) self.override_config("proxy_user_domain_name", 'sahara_proxy_domain') configs = { 'configs': {}, 'args': ['swift://ex/i', 'output_path'] } job, job_exec = u.create_job_exec(edp.JOB_TYPE_JAVA, configs, proxy=True) res = workflow_factory.get_workflow_xml(job, u.create_cluster(), job_exec.job_configs) self.assertIn(""" <configuration> <property> <name>fs.swift.service.sahara.domain.name</name> <value>sahara_proxy_domain</value> </property> <property> <name>fs.swift.service.sahara.password</name> <value>55555555-6666-7777-8888-999999999999</value> </property> <property> <name>fs.swift.service.sahara.trust.id</name> <value>0123456789abcdef0123456789abcdef</value> </property> <property> <name>fs.swift.service.sahara.username</name> <value>job_00000000-1111-2222-3333-4444444444444444</value> </property> </configuration> <main-class>%s</main-class> <java-opts>%s</java-opts> <arg>swift://ex.sahara/i</arg> <arg>output_path</arg>""" % (_java_main_class, _java_opts), res)