def test_build_workflow_for_job_hive(self, job_binary): job, job_exec = u.create_job_exec(edp.JOB_TYPE_HIVE, configs={}) job_binary.return_value = {"name": "script.q"} input_data = u.create_data_source('swift://ex/i') output_data = u.create_data_source('swift://ex/o') res = workflow_factory.get_workflow_xml(job, u.create_cluster(), job_exec, input_data, output_data, 'hadoop') doc = xml.parseString(res) hive = doc.getElementsByTagName('hive')[0] self.assertEqual(xmlutils.get_text_from_node(hive, 'job-xml'), '/user/hadoop/conf/hive-site.xml') configuration = hive.getElementsByTagName('configuration') properties = xmlutils.get_property_dict(configuration[0]) self.assertEqual( { 'fs.swift.service.sahara.password': '******', 'fs.swift.service.sahara.username': '******' }, properties) self.assertEqual(xmlutils.get_text_from_node(hive, 'script'), 'script.q') params = xmlutils.get_param_dict(hive) self.assertEqual( { 'INPUT': 'swift://ex.sahara/i', 'OUTPUT': 'swift://ex.sahara/o' }, params) # testing workflow creation with a proxy domain self.override_config('use_domain_for_proxy_users', True) self.override_config("proxy_user_domain_name", 'sahara_proxy_domain') job, job_exec = u.create_job_exec(edp.JOB_TYPE_HIVE, proxy=True) res = workflow_factory.get_workflow_xml(job, u.create_cluster(), job_exec, input_data, output_data, 'hadoop') doc = xml.parseString(res) hive = doc.getElementsByTagName('hive')[0] configuration = hive.getElementsByTagName('configuration') properties = xmlutils.get_property_dict(configuration[0]) self.assertEqual( { 'fs.swift.service.sahara.domain.name': 'sahara_proxy_domain', 'fs.swift.service.sahara.trust.id': '0123456789abcdef0123456789abcdef', 'fs.swift.service.sahara.password': '******', 'fs.swift.service.sahara.username': '******' }, properties)
def test_generate_xml_configs(self, auth_url): auth_url.return_value = "http://localhost:5000/v2/" # Make a dict of swift configs to verify generated values swift_vals = c_helper.extract_name_values(swift.get_swift_configs()) # Make sure that all the swift configs are in core-site c = c_helper.generate_xml_configs({}, ['/mnt/one'], 'localhost', None) doc = xml.parseString(c['core-site']) configuration = doc.getElementsByTagName('configuration') properties = xmlutils.get_property_dict(configuration[0]) self.assertDictContainsSubset(swift_vals, properties) # Make sure that user values have precedence over defaults c = c_helper.generate_xml_configs( {'HDFS': {'fs.swift.service.sahara.tenant': 'fred'}}, ['/mnt/one'], 'localhost', None) doc = xml.parseString(c['core-site']) configuration = doc.getElementsByTagName('configuration') properties = xmlutils.get_property_dict(configuration[0]) mod_swift_vals = copy.copy(swift_vals) mod_swift_vals['fs.swift.service.sahara.tenant'] = 'fred' self.assertDictContainsSubset(mod_swift_vals, properties) # Make sure that swift confgs are left out if not enabled c = c_helper.generate_xml_configs( {'HDFS': {'fs.swift.service.sahara.tenant': 'fred'}, 'general': {'Enable Swift': False}}, ['/mnt/one'], 'localhost', None) doc = xml.parseString(c['core-site']) configuration = doc.getElementsByTagName('configuration') properties = xmlutils.get_property_dict(configuration[0]) for key in mod_swift_vals.keys(): self.assertNotIn(key, properties)
def test_build_workflow_for_job_hive(self, job_binary): job, job_exec = u.create_job_exec(edp.JOB_TYPE_HIVE, configs={}) job_binary.return_value = {"name": "script.q"} input_data = u.create_data_source('swift://ex/i') output_data = u.create_data_source('swift://ex/o') data_source_urls = {input_data.id: input_data.url, output_data.id: output_data.url} res = workflow_factory.get_workflow_xml( job, u.create_cluster(), job_exec.job_configs, input_data, output_data, 'hadoop', data_source_urls) doc = xml.parseString(res) hive = doc.getElementsByTagName('hive')[0] self.assertEqual('/user/hadoop/conf/hive-site.xml', xmlutils.get_text_from_node(hive, 'job-xml')) configuration = hive.getElementsByTagName('configuration') properties = xmlutils.get_property_dict(configuration[0]) self.assertEqual({'fs.swift.service.sahara.password': '******', 'fs.swift.service.sahara.username': '******'}, properties) self.assertEqual('script.q', xmlutils.get_text_from_node(hive, 'script')) params = xmlutils.get_param_dict(hive) self.assertEqual({'INPUT': 'swift://ex.sahara/i', 'OUTPUT': 'swift://ex.sahara/o'}, params) # testing workflow creation with a proxy domain self.override_config('use_domain_for_proxy_users', True) self.override_config("proxy_user_domain_name", 'sahara_proxy_domain') job, job_exec = u.create_job_exec(edp.JOB_TYPE_HIVE, proxy=True) res = workflow_factory.get_workflow_xml( job, u.create_cluster(), job_exec.job_configs, input_data, output_data, 'hadoop', data_source_urls) doc = xml.parseString(res) hive = doc.getElementsByTagName('hive')[0] configuration = hive.getElementsByTagName('configuration') properties = xmlutils.get_property_dict(configuration[0]) self.assertEqual({ 'fs.swift.service.sahara.domain.name': 'sahara_proxy_domain', 'fs.swift.service.sahara.trust.id': '0123456789abcdef0123456789abcdef', 'fs.swift.service.sahara.password': '******', 'fs.swift.service.sahara.username': '******'}, properties)
def test_build_workflow_for_job_hive(self, job_binary): job, job_exec = u.create_job_exec(edp.JOB_TYPE_HIVE, configs={}) job_binary.return_value = {"name": "script.q"} input_data = u.create_data_source("swift://ex/i") output_data = u.create_data_source("swift://ex/o") data_source_urls = {input_data.id: input_data.url, output_data.id: output_data.url} res = workflow_factory.get_workflow_xml( job, u.create_cluster(), job_exec.job_configs, input_data, output_data, "hadoop", data_source_urls ) doc = xml.parseString(res) hive = doc.getElementsByTagName("hive")[0] self.assertEqual("/user/hadoop/conf/hive-site.xml", xmlutils.get_text_from_node(hive, "job-xml")) configuration = hive.getElementsByTagName("configuration") properties = xmlutils.get_property_dict(configuration[0]) self.assertEqual( {"fs.swift.service.sahara.password": "******", "fs.swift.service.sahara.username": "******"}, properties ) self.assertEqual("script.q", xmlutils.get_text_from_node(hive, "script")) params = xmlutils.get_param_dict(hive) self.assertEqual({"INPUT": "swift://ex.sahara/i", "OUTPUT": "swift://ex.sahara/o"}, params) # testing workflow creation with a proxy domain self.override_config("use_domain_for_proxy_users", True) self.override_config("proxy_user_domain_name", "sahara_proxy_domain") job, job_exec = u.create_job_exec(edp.JOB_TYPE_HIVE, proxy=True) res = workflow_factory.get_workflow_xml( job, u.create_cluster(), job_exec.job_configs, input_data, output_data, "hadoop", data_source_urls ) doc = xml.parseString(res) hive = doc.getElementsByTagName("hive")[0] configuration = hive.getElementsByTagName("configuration") properties = xmlutils.get_property_dict(configuration[0]) self.assertEqual( { "fs.swift.service.sahara.domain.name": "sahara_proxy_domain", "fs.swift.service.sahara.trust.id": "0123456789abcdef0123456789abcdef", "fs.swift.service.sahara.password": "******", "fs.swift.service.sahara.username": "******", }, properties, )
def test_generate_xml_configs(self, auth_url): auth_url.return_value = "http://localhost:5000/v2/" # Make a dict of swift configs to verify generated values swift_vals = c_helper.extract_name_values(swift.get_swift_configs()) # Make sure that all the swift configs are in core-site c = c_helper.generate_xml_configs({}, ['/mnt/one'], 'localhost', None) doc = xml.parseString(c['core-site']) configuration = doc.getElementsByTagName('configuration') properties = xmlutils.get_property_dict(configuration[0]) self.assertDictContainsSubset(swift_vals, properties) # Make sure that user values have precedence over defaults c = c_helper.generate_xml_configs( {'HDFS': { 'fs.swift.service.sahara.tenant': 'fred' }}, ['/mnt/one'], 'localhost', None) doc = xml.parseString(c['core-site']) configuration = doc.getElementsByTagName('configuration') properties = xmlutils.get_property_dict(configuration[0]) mod_swift_vals = copy.copy(swift_vals) mod_swift_vals['fs.swift.service.sahara.tenant'] = 'fred' self.assertDictContainsSubset(mod_swift_vals, properties) # Make sure that swift confgs are left out if not enabled c = c_helper.generate_xml_configs( { 'HDFS': { 'fs.swift.service.sahara.tenant': 'fred' }, 'general': { 'Enable Swift': False } }, ['/mnt/one'], 'localhost', None) doc = xml.parseString(c['core-site']) configuration = doc.getElementsByTagName('configuration') properties = xmlutils.get_property_dict(configuration[0]) for key in mod_swift_vals.keys(): self.assertNotIn(key, properties)
def test_generate_xml_configs(self, auth_url): auth_url.return_value = "http://localhost:5000/v2/" # Make a dict of swift configs to verify generated values swift_vals = c_helper.extract_name_values(swift.get_swift_configs()) # Make sure that all the swift configs are in core-site c = c_helper.generate_xml_configs({}, ["/mnt/one"], "localhost", None) doc = xml.parseString(c["core-site"]) configuration = doc.getElementsByTagName("configuration") properties = xmlutils.get_property_dict(configuration[0]) self.assertDictContainsSubset(swift_vals, properties) # Make sure that user values have precedence over defaults c = c_helper.generate_xml_configs( {"HDFS": {"fs.swift.service.sahara.tenant": "fred"}}, ["/mnt/one"], "localhost", None ) doc = xml.parseString(c["core-site"]) configuration = doc.getElementsByTagName("configuration") properties = xmlutils.get_property_dict(configuration[0]) mod_swift_vals = copy.copy(swift_vals) mod_swift_vals["fs.swift.service.sahara.tenant"] = "fred" self.assertDictContainsSubset(mod_swift_vals, properties) # Make sure that swift confgs are left out if not enabled c = c_helper.generate_xml_configs( {"HDFS": {"fs.swift.service.sahara.tenant": "fred"}, "general": {"Enable Swift": False}}, ["/mnt/one"], "localhost", None, ) doc = xml.parseString(c["core-site"]) configuration = doc.getElementsByTagName("configuration") properties = xmlutils.get_property_dict(configuration[0]) for key in mod_swift_vals.keys(): self.assertNotIn(key, properties)
def get_property_dict(elem, **kwargs): return xmlutils.get_property_dict(elem)