def test_build_workflow_for_job_hive(self, job_binary):

        job, job_exec = u.create_job_exec(edp.JOB_TYPE_HIVE, configs={})
        job_binary.return_value = {"name": "script.q"}

        input_data = u.create_data_source('swift://ex/i')
        output_data = u.create_data_source('swift://ex/o')

        res = workflow_factory.get_workflow_xml(job, u.create_cluster(),
                                                job_exec, input_data,
                                                output_data, 'hadoop')

        doc = xml.parseString(res)
        hive = doc.getElementsByTagName('hive')[0]
        self.assertEqual(xmlutils.get_text_from_node(hive, 'job-xml'),
                         '/user/hadoop/conf/hive-site.xml')

        configuration = hive.getElementsByTagName('configuration')
        properties = xmlutils.get_property_dict(configuration[0])
        self.assertEqual(
            {
                'fs.swift.service.sahara.password': '******',
                'fs.swift.service.sahara.username': '******'
            }, properties)

        self.assertEqual(xmlutils.get_text_from_node(hive, 'script'),
                         'script.q')

        params = xmlutils.get_param_dict(hive)
        self.assertEqual(
            {
                'INPUT': 'swift://ex.sahara/i',
                'OUTPUT': 'swift://ex.sahara/o'
            }, params)

        # testing workflow creation with a proxy domain
        self.override_config('use_domain_for_proxy_users', True)
        self.override_config("proxy_user_domain_name", 'sahara_proxy_domain')

        job, job_exec = u.create_job_exec(edp.JOB_TYPE_HIVE, proxy=True)

        res = workflow_factory.get_workflow_xml(job, u.create_cluster(),
                                                job_exec, input_data,
                                                output_data, 'hadoop')

        doc = xml.parseString(res)
        hive = doc.getElementsByTagName('hive')[0]
        configuration = hive.getElementsByTagName('configuration')
        properties = xmlutils.get_property_dict(configuration[0])
        self.assertEqual(
            {
                'fs.swift.service.sahara.domain.name':
                'sahara_proxy_domain',
                'fs.swift.service.sahara.trust.id':
                '0123456789abcdef0123456789abcdef',
                'fs.swift.service.sahara.password':
                '******',
                'fs.swift.service.sahara.username':
                '******'
            }, properties)
    def test_generate_xml_configs(self, auth_url):
        auth_url.return_value = "http://localhost:5000/v2/"

        # Make a dict of swift configs to verify generated values
        swift_vals = c_helper.extract_name_values(swift.get_swift_configs())

        # Make sure that all the swift configs are in core-site
        c = c_helper.generate_xml_configs({}, ['/mnt/one'], 'localhost', None)
        doc = xml.parseString(c['core-site'])
        configuration = doc.getElementsByTagName('configuration')
        properties = xmlutils.get_property_dict(configuration[0])
        self.assertDictContainsSubset(swift_vals, properties)

        # Make sure that user values have precedence over defaults
        c = c_helper.generate_xml_configs(
            {'HDFS': {'fs.swift.service.sahara.tenant': 'fred'}},
            ['/mnt/one'], 'localhost', None)
        doc = xml.parseString(c['core-site'])
        configuration = doc.getElementsByTagName('configuration')
        properties = xmlutils.get_property_dict(configuration[0])
        mod_swift_vals = copy.copy(swift_vals)
        mod_swift_vals['fs.swift.service.sahara.tenant'] = 'fred'
        self.assertDictContainsSubset(mod_swift_vals, properties)

        # Make sure that swift confgs are left out if not enabled
        c = c_helper.generate_xml_configs(
            {'HDFS': {'fs.swift.service.sahara.tenant': 'fred'},
             'general': {'Enable Swift': False}},
            ['/mnt/one'], 'localhost', None)
        doc = xml.parseString(c['core-site'])
        configuration = doc.getElementsByTagName('configuration')
        properties = xmlutils.get_property_dict(configuration[0])
        for key in mod_swift_vals.keys():
            self.assertNotIn(key, properties)
Exemple #3
0
    def test_build_workflow_for_job_hive(self, job_binary):

        job, job_exec = u.create_job_exec(edp.JOB_TYPE_HIVE, configs={})
        job_binary.return_value = {"name": "script.q"}

        input_data = u.create_data_source('swift://ex/i')
        output_data = u.create_data_source('swift://ex/o')
        data_source_urls = {input_data.id: input_data.url,
                            output_data.id: output_data.url}

        res = workflow_factory.get_workflow_xml(
            job, u.create_cluster(), job_exec.job_configs,
            input_data, output_data, 'hadoop', data_source_urls)

        doc = xml.parseString(res)
        hive = doc.getElementsByTagName('hive')[0]
        self.assertEqual('/user/hadoop/conf/hive-site.xml',
                         xmlutils.get_text_from_node(hive, 'job-xml'))

        configuration = hive.getElementsByTagName('configuration')
        properties = xmlutils.get_property_dict(configuration[0])
        self.assertEqual({'fs.swift.service.sahara.password': '******',
                          'fs.swift.service.sahara.username': '******'},
                         properties)

        self.assertEqual('script.q',
                         xmlutils.get_text_from_node(hive, 'script'))

        params = xmlutils.get_param_dict(hive)
        self.assertEqual({'INPUT': 'swift://ex.sahara/i',
                          'OUTPUT': 'swift://ex.sahara/o'}, params)

        # testing workflow creation with a proxy domain
        self.override_config('use_domain_for_proxy_users', True)
        self.override_config("proxy_user_domain_name", 'sahara_proxy_domain')

        job, job_exec = u.create_job_exec(edp.JOB_TYPE_HIVE, proxy=True)

        res = workflow_factory.get_workflow_xml(
            job, u.create_cluster(), job_exec.job_configs,
            input_data, output_data, 'hadoop', data_source_urls)

        doc = xml.parseString(res)
        hive = doc.getElementsByTagName('hive')[0]
        configuration = hive.getElementsByTagName('configuration')
        properties = xmlutils.get_property_dict(configuration[0])
        self.assertEqual({
            'fs.swift.service.sahara.domain.name':
            'sahara_proxy_domain',

            'fs.swift.service.sahara.trust.id':
            '0123456789abcdef0123456789abcdef',

            'fs.swift.service.sahara.password':
            '******',

            'fs.swift.service.sahara.username':
            '******'}, properties)
Exemple #4
0
    def test_build_workflow_for_job_hive(self, job_binary):

        job, job_exec = u.create_job_exec(edp.JOB_TYPE_HIVE, configs={})
        job_binary.return_value = {"name": "script.q"}

        input_data = u.create_data_source("swift://ex/i")
        output_data = u.create_data_source("swift://ex/o")
        data_source_urls = {input_data.id: input_data.url, output_data.id: output_data.url}

        res = workflow_factory.get_workflow_xml(
            job, u.create_cluster(), job_exec.job_configs, input_data, output_data, "hadoop", data_source_urls
        )

        doc = xml.parseString(res)
        hive = doc.getElementsByTagName("hive")[0]
        self.assertEqual("/user/hadoop/conf/hive-site.xml", xmlutils.get_text_from_node(hive, "job-xml"))

        configuration = hive.getElementsByTagName("configuration")
        properties = xmlutils.get_property_dict(configuration[0])
        self.assertEqual(
            {"fs.swift.service.sahara.password": "******", "fs.swift.service.sahara.username": "******"}, properties
        )

        self.assertEqual("script.q", xmlutils.get_text_from_node(hive, "script"))

        params = xmlutils.get_param_dict(hive)
        self.assertEqual({"INPUT": "swift://ex.sahara/i", "OUTPUT": "swift://ex.sahara/o"}, params)

        # testing workflow creation with a proxy domain
        self.override_config("use_domain_for_proxy_users", True)
        self.override_config("proxy_user_domain_name", "sahara_proxy_domain")

        job, job_exec = u.create_job_exec(edp.JOB_TYPE_HIVE, proxy=True)

        res = workflow_factory.get_workflow_xml(
            job, u.create_cluster(), job_exec.job_configs, input_data, output_data, "hadoop", data_source_urls
        )

        doc = xml.parseString(res)
        hive = doc.getElementsByTagName("hive")[0]
        configuration = hive.getElementsByTagName("configuration")
        properties = xmlutils.get_property_dict(configuration[0])
        self.assertEqual(
            {
                "fs.swift.service.sahara.domain.name": "sahara_proxy_domain",
                "fs.swift.service.sahara.trust.id": "0123456789abcdef0123456789abcdef",
                "fs.swift.service.sahara.password": "******",
                "fs.swift.service.sahara.username": "******",
            },
            properties,
        )
Exemple #5
0
    def test_generate_xml_configs(self, auth_url):
        auth_url.return_value = "http://localhost:5000/v2/"

        # Make a dict of swift configs to verify generated values
        swift_vals = c_helper.extract_name_values(swift.get_swift_configs())

        # Make sure that all the swift configs are in core-site
        c = c_helper.generate_xml_configs({}, ['/mnt/one'], 'localhost', None)
        doc = xml.parseString(c['core-site'])
        configuration = doc.getElementsByTagName('configuration')
        properties = xmlutils.get_property_dict(configuration[0])
        self.assertDictContainsSubset(swift_vals, properties)

        # Make sure that user values have precedence over defaults
        c = c_helper.generate_xml_configs(
            {'HDFS': {
                'fs.swift.service.sahara.tenant': 'fred'
            }}, ['/mnt/one'], 'localhost', None)
        doc = xml.parseString(c['core-site'])
        configuration = doc.getElementsByTagName('configuration')
        properties = xmlutils.get_property_dict(configuration[0])
        mod_swift_vals = copy.copy(swift_vals)
        mod_swift_vals['fs.swift.service.sahara.tenant'] = 'fred'
        self.assertDictContainsSubset(mod_swift_vals, properties)

        # Make sure that swift confgs are left out if not enabled
        c = c_helper.generate_xml_configs(
            {
                'HDFS': {
                    'fs.swift.service.sahara.tenant': 'fred'
                },
                'general': {
                    'Enable Swift': False
                }
            }, ['/mnt/one'], 'localhost', None)
        doc = xml.parseString(c['core-site'])
        configuration = doc.getElementsByTagName('configuration')
        properties = xmlutils.get_property_dict(configuration[0])
        for key in mod_swift_vals.keys():
            self.assertNotIn(key, properties)
    def test_generate_xml_configs(self, auth_url):
        auth_url.return_value = "http://localhost:5000/v2/"

        # Make a dict of swift configs to verify generated values
        swift_vals = c_helper.extract_name_values(swift.get_swift_configs())

        # Make sure that all the swift configs are in core-site
        c = c_helper.generate_xml_configs({}, ["/mnt/one"], "localhost", None)
        doc = xml.parseString(c["core-site"])
        configuration = doc.getElementsByTagName("configuration")
        properties = xmlutils.get_property_dict(configuration[0])
        self.assertDictContainsSubset(swift_vals, properties)

        # Make sure that user values have precedence over defaults
        c = c_helper.generate_xml_configs(
            {"HDFS": {"fs.swift.service.sahara.tenant": "fred"}}, ["/mnt/one"], "localhost", None
        )
        doc = xml.parseString(c["core-site"])
        configuration = doc.getElementsByTagName("configuration")
        properties = xmlutils.get_property_dict(configuration[0])
        mod_swift_vals = copy.copy(swift_vals)
        mod_swift_vals["fs.swift.service.sahara.tenant"] = "fred"
        self.assertDictContainsSubset(mod_swift_vals, properties)

        # Make sure that swift confgs are left out if not enabled
        c = c_helper.generate_xml_configs(
            {"HDFS": {"fs.swift.service.sahara.tenant": "fred"}, "general": {"Enable Swift": False}},
            ["/mnt/one"],
            "localhost",
            None,
        )
        doc = xml.parseString(c["core-site"])
        configuration = doc.getElementsByTagName("configuration")
        properties = xmlutils.get_property_dict(configuration[0])
        for key in mod_swift_vals.keys():
            self.assertNotIn(key, properties)
Exemple #7
0
def get_property_dict(elem, **kwargs):
    return xmlutils.get_property_dict(elem)