def test_build_workflow_for_job_hive(self, job_binary): job, job_exec = u.create_job_exec(edp.JOB_TYPE_HIVE, configs={}) job_binary.return_value = {"name": "script.q"} input_data = u.create_data_source('swift://ex/i') output_data = u.create_data_source('swift://ex/o') res = workflow_factory.get_workflow_xml(job, u.create_cluster(), job_exec, input_data, output_data, 'hadoop') doc = xml.parseString(res) hive = doc.getElementsByTagName('hive')[0] self.assertEqual(xmlutils.get_text_from_node(hive, 'job-xml'), '/user/hadoop/conf/hive-site.xml') configuration = hive.getElementsByTagName('configuration') properties = xmlutils.get_property_dict(configuration[0]) self.assertEqual( { 'fs.swift.service.sahara.password': '******', 'fs.swift.service.sahara.username': '******' }, properties) self.assertEqual(xmlutils.get_text_from_node(hive, 'script'), 'script.q') params = xmlutils.get_param_dict(hive) self.assertEqual( { 'INPUT': 'swift://ex.sahara/i', 'OUTPUT': 'swift://ex.sahara/o' }, params) # testing workflow creation with a proxy domain self.override_config('use_domain_for_proxy_users', True) self.override_config("proxy_user_domain_name", 'sahara_proxy_domain') job, job_exec = u.create_job_exec(edp.JOB_TYPE_HIVE, proxy=True) res = workflow_factory.get_workflow_xml(job, u.create_cluster(), job_exec, input_data, output_data, 'hadoop') doc = xml.parseString(res) hive = doc.getElementsByTagName('hive')[0] configuration = hive.getElementsByTagName('configuration') properties = xmlutils.get_property_dict(configuration[0]) self.assertEqual( { 'fs.swift.service.sahara.domain.name': 'sahara_proxy_domain', 'fs.swift.service.sahara.trust.id': '0123456789abcdef0123456789abcdef', 'fs.swift.service.sahara.password': '******', 'fs.swift.service.sahara.username': '******' }, properties)
def test_build_workflow_for_job_hive(self, job_binary): job, job_exec = u.create_job_exec(edp.JOB_TYPE_HIVE, configs={}) job_binary.return_value = {"name": "script.q"} input_data = u.create_data_source('swift://ex/i') output_data = u.create_data_source('swift://ex/o') data_source_urls = {input_data.id: input_data.url, output_data.id: output_data.url} res = workflow_factory.get_workflow_xml( job, u.create_cluster(), job_exec.job_configs, input_data, output_data, 'hadoop', data_source_urls) doc = xml.parseString(res) hive = doc.getElementsByTagName('hive')[0] self.assertEqual('/user/hadoop/conf/hive-site.xml', xmlutils.get_text_from_node(hive, 'job-xml')) configuration = hive.getElementsByTagName('configuration') properties = xmlutils.get_property_dict(configuration[0]) self.assertEqual({'fs.swift.service.sahara.password': '******', 'fs.swift.service.sahara.username': '******'}, properties) self.assertEqual('script.q', xmlutils.get_text_from_node(hive, 'script')) params = xmlutils.get_param_dict(hive) self.assertEqual({'INPUT': 'swift://ex.sahara/i', 'OUTPUT': 'swift://ex.sahara/o'}, params) # testing workflow creation with a proxy domain self.override_config('use_domain_for_proxy_users', True) self.override_config("proxy_user_domain_name", 'sahara_proxy_domain') job, job_exec = u.create_job_exec(edp.JOB_TYPE_HIVE, proxy=True) res = workflow_factory.get_workflow_xml( job, u.create_cluster(), job_exec.job_configs, input_data, output_data, 'hadoop', data_source_urls) doc = xml.parseString(res) hive = doc.getElementsByTagName('hive')[0] configuration = hive.getElementsByTagName('configuration') properties = xmlutils.get_property_dict(configuration[0]) self.assertEqual({ 'fs.swift.service.sahara.domain.name': 'sahara_proxy_domain', 'fs.swift.service.sahara.trust.id': '0123456789abcdef0123456789abcdef', 'fs.swift.service.sahara.password': '******', 'fs.swift.service.sahara.username': '******'}, properties)
def test_build_workflow_for_job_hive(self, job_binary): job, job_exec = u.create_job_exec(edp.JOB_TYPE_HIVE, configs={}) job_binary.return_value = {"name": "script.q"} input_data = u.create_data_source("swift://ex/i") output_data = u.create_data_source("swift://ex/o") data_source_urls = {input_data.id: input_data.url, output_data.id: output_data.url} res = workflow_factory.get_workflow_xml( job, u.create_cluster(), job_exec.job_configs, input_data, output_data, "hadoop", data_source_urls ) doc = xml.parseString(res) hive = doc.getElementsByTagName("hive")[0] self.assertEqual("/user/hadoop/conf/hive-site.xml", xmlutils.get_text_from_node(hive, "job-xml")) configuration = hive.getElementsByTagName("configuration") properties = xmlutils.get_property_dict(configuration[0]) self.assertEqual( {"fs.swift.service.sahara.password": "******", "fs.swift.service.sahara.username": "******"}, properties ) self.assertEqual("script.q", xmlutils.get_text_from_node(hive, "script")) params = xmlutils.get_param_dict(hive) self.assertEqual({"INPUT": "swift://ex.sahara/i", "OUTPUT": "swift://ex.sahara/o"}, params) # testing workflow creation with a proxy domain self.override_config("use_domain_for_proxy_users", True) self.override_config("proxy_user_domain_name", "sahara_proxy_domain") job, job_exec = u.create_job_exec(edp.JOB_TYPE_HIVE, proxy=True) res = workflow_factory.get_workflow_xml( job, u.create_cluster(), job_exec.job_configs, input_data, output_data, "hadoop", data_source_urls ) doc = xml.parseString(res) hive = doc.getElementsByTagName("hive")[0] configuration = hive.getElementsByTagName("configuration") properties = xmlutils.get_property_dict(configuration[0]) self.assertEqual( { "fs.swift.service.sahara.domain.name": "sahara_proxy_domain", "fs.swift.service.sahara.trust.id": "0123456789abcdef0123456789abcdef", "fs.swift.service.sahara.password": "******", "fs.swift.service.sahara.username": "******", }, properties, )
def load_xml_file(path): kv_mapper = lambda i: (x.get_text_from_node(i, 'name'), x._adjust_field(x.get_text_from_node(i, 'value'))) strip_mapper = lambda i: (i[0].strip(), i[1].strip()) props = x.load_xml_document(path).getElementsByTagName('property') return dict(map(strip_mapper, map(kv_mapper, props)))