コード例 #1
0
ファイル: test_job_manager.py プロジェクト: suriya2612/sahara
    def test_build_workflow_for_job_hive(self, job_binary):

        job, job_exec = u.create_job_exec(edp.JOB_TYPE_HIVE, configs={})
        job_binary.return_value = {"name": "script.q"}

        input_data = u.create_data_source('swift://ex/i')
        output_data = u.create_data_source('swift://ex/o')

        res = workflow_factory.get_workflow_xml(job, u.create_cluster(),
                                                job_exec, input_data,
                                                output_data, 'hadoop')

        doc = xml.parseString(res)
        hive = doc.getElementsByTagName('hive')[0]
        self.assertEqual(xmlutils.get_text_from_node(hive, 'job-xml'),
                         '/user/hadoop/conf/hive-site.xml')

        configuration = hive.getElementsByTagName('configuration')
        properties = xmlutils.get_property_dict(configuration[0])
        self.assertEqual(
            {
                'fs.swift.service.sahara.password': '******',
                'fs.swift.service.sahara.username': '******'
            }, properties)

        self.assertEqual(xmlutils.get_text_from_node(hive, 'script'),
                         'script.q')

        params = xmlutils.get_param_dict(hive)
        self.assertEqual(
            {
                'INPUT': 'swift://ex.sahara/i',
                'OUTPUT': 'swift://ex.sahara/o'
            }, params)

        # testing workflow creation with a proxy domain
        self.override_config('use_domain_for_proxy_users', True)
        self.override_config("proxy_user_domain_name", 'sahara_proxy_domain')

        job, job_exec = u.create_job_exec(edp.JOB_TYPE_HIVE, proxy=True)

        res = workflow_factory.get_workflow_xml(job, u.create_cluster(),
                                                job_exec, input_data,
                                                output_data, 'hadoop')

        doc = xml.parseString(res)
        hive = doc.getElementsByTagName('hive')[0]
        configuration = hive.getElementsByTagName('configuration')
        properties = xmlutils.get_property_dict(configuration[0])
        self.assertEqual(
            {
                'fs.swift.service.sahara.domain.name':
                'sahara_proxy_domain',
                'fs.swift.service.sahara.trust.id':
                '0123456789abcdef0123456789abcdef',
                'fs.swift.service.sahara.password':
                '******',
                'fs.swift.service.sahara.username':
                '******'
            }, properties)
コード例 #2
0
ファイル: test_job_manager.py プロジェクト: thefuyang/sahara
    def test_job_type_supported(self, job_get):
        job, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG)
        job_get.return_value = job
        self.assertIsNotNone(job_manager._get_job_engine(u.create_cluster(), job_exec))

        job.type = "unsupported_type"
        self.assertIsNone(job_manager._get_job_engine(u.create_cluster(), job_exec))
コード例 #3
0
    def test_build_workflow_for_job_pig(self, job_binary):

        job, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG, configs={})
        job_binary.return_value = {"name": "script.pig"}

        input_data = u.create_data_source('swift://ex/i')
        output_data = u.create_data_source('swift://ex/o')
        data_source_urls = {input_data.id: input_data.url,
                            output_data.id: output_data.url}

        res = workflow_factory.get_workflow_xml(
            job, u.create_cluster(), job_exec.job_configs,
            input_data, output_data, 'hadoop', data_source_urls)

        self.assertIn("""
      <param>INPUT=swift://ex.sahara/i</param>
      <param>OUTPUT=swift://ex.sahara/o</param>""", res)

        self.assertIn("""
      <configuration>
        <property>
          <name>fs.swift.service.sahara.password</name>
          <value>admin1</value>
        </property>
        <property>
          <name>fs.swift.service.sahara.username</name>
          <value>admin</value>
        </property>
      </configuration>""", res)

        self.assertIn("<script>script.pig</script>", res)

        # testing workflow creation with a proxy domain
        self.override_config('use_domain_for_proxy_users', True)
        self.override_config("proxy_user_domain_name", 'sahara_proxy_domain')
        job, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG, proxy=True)

        res = workflow_factory.get_workflow_xml(
            job, u.create_cluster(), job_exec.job_configs,
            input_data, output_data, 'hadoop', data_source_urls)

        self.assertIn("""
      <configuration>
        <property>
          <name>fs.swift.service.sahara.domain.name</name>
          <value>sahara_proxy_domain</value>
        </property>
        <property>
          <name>fs.swift.service.sahara.password</name>
          <value>55555555-6666-7777-8888-999999999999</value>
        </property>
        <property>
          <name>fs.swift.service.sahara.trust.id</name>
          <value>0123456789abcdef0123456789abcdef</value>
        </property>
        <property>
          <name>fs.swift.service.sahara.username</name>
          <value>job_00000000-1111-2222-3333-4444444444444444</value>
        </property>
      </configuration>""", res)
コード例 #4
0
ファイル: test_job_manager.py プロジェクト: rogeryu27/sahara
    def test_build_workflow_for_job_pig(self, job_binary):

        job, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG, configs={})
        job_binary.return_value = {"name": "script.pig"}

        input_data = u.create_data_source('swift://ex/i')
        output_data = u.create_data_source('swift://ex/o')
        data_source_urls = {input_data.id: input_data.url,
                            output_data.id: output_data.url}

        res = workflow_factory.get_workflow_xml(
            job, u.create_cluster(), job_exec.job_configs,
            input_data, output_data, 'hadoop', data_source_urls)

        self.assertIn("""
      <param>INPUT=swift://ex.sahara/i</param>
      <param>OUTPUT=swift://ex.sahara/o</param>""", res)

        self.assertIn("""
      <configuration>
        <property>
          <name>fs.swift.service.sahara.password</name>
          <value>admin1</value>
        </property>
        <property>
          <name>fs.swift.service.sahara.username</name>
          <value>admin</value>
        </property>
      </configuration>""", res)

        self.assertIn("<script>script.pig</script>", res)

        # testing workflow creation with a proxy domain
        self.override_config('use_domain_for_proxy_users', True)
        self.override_config("proxy_user_domain_name", 'sahara_proxy_domain')
        job, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG, proxy=True)

        res = workflow_factory.get_workflow_xml(
            job, u.create_cluster(), job_exec.job_configs,
            input_data, output_data, 'hadoop', data_source_urls)

        self.assertIn("""
      <configuration>
        <property>
          <name>fs.swift.service.sahara.domain.name</name>
          <value>sahara_proxy_domain</value>
        </property>
        <property>
          <name>fs.swift.service.sahara.password</name>
          <value>55555555-6666-7777-8888-999999999999</value>
        </property>
        <property>
          <name>fs.swift.service.sahara.trust.id</name>
          <value>0123456789abcdef0123456789abcdef</value>
        </property>
        <property>
          <name>fs.swift.service.sahara.username</name>
          <value>job_00000000-1111-2222-3333-4444444444444444</value>
        </property>
      </configuration>""", res)
コード例 #5
0
ファイル: test_job_manager.py プロジェクト: degorenko/sahara
    def test_build_workflow_swift_configs(self, job_binary):

        # Test that swift configs come from either input or output data sources
        job, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG, configs={})
        job_binary.return_value = {"name": "script.pig"}

        input_data = u.create_data_source('swift://ex/i')
        output_data = u.create_data_source('hdfs://user/hadoop/out')

        res = workflow_factory.get_workflow_xml(
            job, u.create_cluster(), job_exec, input_data, output_data,
            'hadoop')

        self.assertIn("""
      <configuration>
        <property>
          <name>fs.swift.service.sahara.password</name>
          <value>admin1</value>
        </property>
        <property>
          <name>fs.swift.service.sahara.username</name>
          <value>admin</value>
        </property>
      </configuration>""", res)

        input_data = u.create_data_source('hdfs://user/hadoop/in')
        output_data = u.create_data_source('swift://ex/o')

        res = workflow_factory.get_workflow_xml(
            job, u.create_cluster(), job_exec, input_data, output_data,
            'hadoop')

        self.assertIn("""
      <configuration>
        <property>
          <name>fs.swift.service.sahara.password</name>
          <value>admin1</value>
        </property>
        <property>
          <name>fs.swift.service.sahara.username</name>
          <value>admin</value>
        </property>
      </configuration>""", res)

        job, job_exec = u.create_job_exec(
            edp.JOB_TYPE_PIG, configs={'configs': {'dummy': 'value'}})
        input_data = u.create_data_source('hdfs://user/hadoop/in')
        output_data = u.create_data_source('hdfs://user/hadoop/out')

        res = workflow_factory.get_workflow_xml(
            job, u.create_cluster(), job_exec, input_data, output_data,
            'hadoop')

        self.assertIn("""
      <configuration>
        <property>
          <name>dummy</name>
          <value>value</value>
        </property>
      </configuration>""", res)
コード例 #6
0
ファイル: test_job_manager.py プロジェクト: rogeryu27/sahara
    def test_build_workflow_for_job_hive(self, job_binary):

        job, job_exec = u.create_job_exec(edp.JOB_TYPE_HIVE, configs={})
        job_binary.return_value = {"name": "script.q"}

        input_data = u.create_data_source('swift://ex/i')
        output_data = u.create_data_source('swift://ex/o')
        data_source_urls = {input_data.id: input_data.url,
                            output_data.id: output_data.url}

        res = workflow_factory.get_workflow_xml(
            job, u.create_cluster(), job_exec.job_configs,
            input_data, output_data, 'hadoop', data_source_urls)

        doc = xml.parseString(res)
        hive = doc.getElementsByTagName('hive')[0]
        self.assertEqual('/user/hadoop/conf/hive-site.xml',
                         xmlutils.get_text_from_node(hive, 'job-xml'))

        configuration = hive.getElementsByTagName('configuration')
        properties = xmlutils.get_property_dict(configuration[0])
        self.assertEqual({'fs.swift.service.sahara.password': '******',
                          'fs.swift.service.sahara.username': '******'},
                         properties)

        self.assertEqual('script.q',
                         xmlutils.get_text_from_node(hive, 'script'))

        params = xmlutils.get_param_dict(hive)
        self.assertEqual({'INPUT': 'swift://ex.sahara/i',
                          'OUTPUT': 'swift://ex.sahara/o'}, params)

        # testing workflow creation with a proxy domain
        self.override_config('use_domain_for_proxy_users', True)
        self.override_config("proxy_user_domain_name", 'sahara_proxy_domain')

        job, job_exec = u.create_job_exec(edp.JOB_TYPE_HIVE, proxy=True)

        res = workflow_factory.get_workflow_xml(
            job, u.create_cluster(), job_exec.job_configs,
            input_data, output_data, 'hadoop', data_source_urls)

        doc = xml.parseString(res)
        hive = doc.getElementsByTagName('hive')[0]
        configuration = hive.getElementsByTagName('configuration')
        properties = xmlutils.get_property_dict(configuration[0])
        self.assertEqual({
            'fs.swift.service.sahara.domain.name':
            'sahara_proxy_domain',

            'fs.swift.service.sahara.trust.id':
            '0123456789abcdef0123456789abcdef',

            'fs.swift.service.sahara.password':
            '******',

            'fs.swift.service.sahara.username':
            '******'}, properties)
コード例 #7
0
    def test_job_type_supported(self, job_get):
        job, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG)
        job_get.return_value = job
        self.assertIsNotNone(job_manager.get_job_engine(u.create_cluster(),
                                                        job_exec))

        job.type = "unsupported_type"
        self.assertIsNone(job_manager.get_job_engine(u.create_cluster(),
                                                     job_exec))
コード例 #8
0
ファイル: test_job_manager.py プロジェクト: thefuyang/sahara
    def test_build_workflow_for_job_hive(self, job_binary):

        job, job_exec = u.create_job_exec(edp.JOB_TYPE_HIVE, configs={})
        job_binary.return_value = {"name": "script.q"}

        input_data = u.create_data_source("swift://ex/i")
        output_data = u.create_data_source("swift://ex/o")
        data_source_urls = {input_data.id: input_data.url, output_data.id: output_data.url}

        res = workflow_factory.get_workflow_xml(
            job, u.create_cluster(), job_exec.job_configs, input_data, output_data, "hadoop", data_source_urls
        )

        doc = xml.parseString(res)
        hive = doc.getElementsByTagName("hive")[0]
        self.assertEqual("/user/hadoop/conf/hive-site.xml", xmlutils.get_text_from_node(hive, "job-xml"))

        configuration = hive.getElementsByTagName("configuration")
        properties = xmlutils.get_property_dict(configuration[0])
        self.assertEqual(
            {"fs.swift.service.sahara.password": "******", "fs.swift.service.sahara.username": "******"}, properties
        )

        self.assertEqual("script.q", xmlutils.get_text_from_node(hive, "script"))

        params = xmlutils.get_param_dict(hive)
        self.assertEqual({"INPUT": "swift://ex.sahara/i", "OUTPUT": "swift://ex.sahara/o"}, params)

        # testing workflow creation with a proxy domain
        self.override_config("use_domain_for_proxy_users", True)
        self.override_config("proxy_user_domain_name", "sahara_proxy_domain")

        job, job_exec = u.create_job_exec(edp.JOB_TYPE_HIVE, proxy=True)

        res = workflow_factory.get_workflow_xml(
            job, u.create_cluster(), job_exec.job_configs, input_data, output_data, "hadoop", data_source_urls
        )

        doc = xml.parseString(res)
        hive = doc.getElementsByTagName("hive")[0]
        configuration = hive.getElementsByTagName("configuration")
        properties = xmlutils.get_property_dict(configuration[0])
        self.assertEqual(
            {
                "fs.swift.service.sahara.domain.name": "sahara_proxy_domain",
                "fs.swift.service.sahara.trust.id": "0123456789abcdef0123456789abcdef",
                "fs.swift.service.sahara.password": "******",
                "fs.swift.service.sahara.username": "******",
            },
            properties,
        )
コード例 #9
0
ファイル: test_oozie.py プロジェクト: gongwayne/Openstack
    def test_get_oozie_job_params(self):
        oje = FakeOozieJobEngine(u.create_cluster())
        oozie_params = {'oozie.libpath': '/mylibpath',
                        'oozie.wf.application.path': '/wrong'}
        scheduled_params = {'start': '2015-06-10T06:05Z',
                            'end': '2015-06-10T06:50Z',
                            'frequency': '10'}
        job_dir = '/job_dir'
        job_execution_type = 'workflow'
        job_params = oje._get_oozie_job_params('hadoop',
                                               '/tmp', oozie_params, True,
                                               scheduled_params, job_dir,
                                               job_execution_type)
        self.assertEqual('http://localhost:50030', job_params["jobTracker"])
        self.assertEqual('hdfs://localhost:8020', job_params["nameNode"])
        self.assertEqual('hadoop', job_params["user.name"])
        self.assertEqual('hdfs://localhost:8020/tmp',
                         job_params['oozie.wf.application.path'])
        self.assertEqual("/mylibpath,hdfs://localhost:8020/user/"
                         "sahara-hbase-lib", job_params['oozie.libpath'])

        # Make sure this doesn't raise an exception
        job_params = oje._get_oozie_job_params('hadoop',
                                               '/tmp', {}, True)
        self.assertEqual("hdfs://localhost:8020/user/"
                         "sahara-hbase-lib", job_params['oozie.libpath'])
コード例 #10
0
ファイル: test_job_manager.py プロジェクト: stannie42/sahara
    def _build_workflow_with_conf_common(self, job_type):

        input_data = u.create_data_source('swift://ex/i')
        output_data = u.create_data_source('swift://ex/o')

        job, job_exec = u.create_job_exec(job_type,
                                          configs={"configs": {'c': 'f'}})

        res = workflow_factory.get_workflow_xml(
            job, u.create_cluster(), job_exec, input_data, output_data,
            'hadoop')

        self.assertIn("""
        <property>
          <name>c</name>
          <value>f</value>
        </property>""", res)

        self.assertIn("""
        <property>
          <name>mapred.input.dir</name>
          <value>swift://ex.sahara/i</value>
        </property>""", res)

        self.assertIn("""
        <property>
          <name>mapred.output.dir</name>
          <value>swift://ex.sahara/o</value>
        </property>""", res)
コード例 #11
0
ファイル: test_oozie.py プロジェクト: butterfy76/sahara
    def test_get_oozie_job_params(self):
        oje = FakeOozieJobEngine(u.create_cluster())
        oozie_params = {
            'oozie.libpath': '/mylibpath',
            'oozie.wf.application.path': '/wrong'
        }
        scheduled_params = {
            'start': '2015-06-10T06:05Z',
            'end': '2015-06-10T06:50Z',
            'frequency': '10'
        }
        job_dir = '/job_dir'
        job_execution_type = 'workflow'
        job_params = oje._get_oozie_job_params('hadoop', '/tmp', oozie_params,
                                               True, scheduled_params, job_dir,
                                               job_execution_type)
        self.assertEqual('http://localhost:50030', job_params["jobTracker"])
        self.assertEqual('hdfs://localhost:8020', job_params["nameNode"])
        self.assertEqual('hadoop', job_params["user.name"])
        self.assertEqual('hdfs://localhost:8020/tmp',
                         job_params['oozie.wf.application.path'])
        self.assertEqual(
            "/mylibpath,hdfs://localhost:8020/user/"
            "sahara-hbase-lib", job_params['oozie.libpath'])

        # Make sure this doesn't raise an exception
        job_params = oje._get_oozie_job_params('hadoop', '/tmp', {}, True)
        self.assertEqual("hdfs://localhost:8020/user/"
                         "sahara-hbase-lib", job_params['oozie.libpath'])
コード例 #12
0
    def test_run_job(self, exec_get, job, data_source, update, remote,
                     wf_factory, get_ds_urls, prepare_cluster):
        wf_factory.return_value = mock.MagicMock()
        remote_class = mock.MagicMock()
        remote_class.__exit__.return_value = 'closed'
        remote.return_value = remote_class

        job_class = mock.MagicMock()
        job.return_value = job_class
        job.name = "myJob"

        source = mock.MagicMock()
        source.url = "localhost"
        data_source.return_value = source

        get_ds_urls.return_value = ('url', 'url')

        oje = FakeOozieJobEngine(u.create_cluster())
        client_class = mock.MagicMock()
        client_class.add_job = mock.MagicMock(return_value=1)
        client_class.get_job_info = mock.MagicMock(
            return_value={'status': 'PENDING'})
        oje.get_client = mock.MagicMock(return_value=client_class)

        _, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG)
        update.return_value = job_exec

        self.assertEqual((1, 'PENDING', None), oje.run_job(job_exec))
コード例 #13
0
    def test_prepare_run_job(self, job, data_source, update, remote,
                             wf_factory, get_ds_urls, prepare_cluster):
        wf_factory.return_value = mock.MagicMock()

        remote_class = mock.MagicMock()
        remote_class.__exit__.return_value = 'closed'
        remote.return_value = remote_class

        job_class = mock.MagicMock()
        job_class.name = "myJob"
        job.return_value = job_class

        source = mock.MagicMock()
        source.url = "localhost"

        get_ds_urls.return_value = ('url', 'url')

        data_source.return_value = source
        oje = FakeOozieJobEngine(u.create_cluster())
        _, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG)
        update.return_value = job_exec

        res = oje._prepare_run_job(job_exec)
        self.assertEqual(ctx.ctx(), res['context'])
        self.assertEqual('hadoop', res['hdfs_user'])
        self.assertEqual(job_exec, res['job_execution'])
        self.assertEqual({}, res['oozie_params'])
コード例 #14
0
ファイル: test_oozie.py プロジェクト: openstack/sahara
    def test_run_job(self, exec_get, job, data_source,
                     update, remote, wf_factory, get_ds_urls,
                     prepare_cluster):
        wf_factory.return_value = mock.MagicMock()
        remote_class = mock.MagicMock()
        remote_class.__exit__.return_value = 'closed'
        remote.return_value = remote_class

        job_class = mock.MagicMock()
        job.return_value = job_class
        job.name = "myJob"

        source = mock.MagicMock()
        source.url = "localhost"
        data_source.return_value = source

        get_ds_urls.return_value = ('url', 'url')

        oje = FakeOozieJobEngine(u.create_cluster())
        client_class = mock.MagicMock()
        client_class.add_job = mock.MagicMock(return_value=1)
        client_class.get_job_info = mock.MagicMock(
            return_value={'status': 'PENDING'})
        oje.get_client = mock.MagicMock(return_value=client_class)

        _, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG)
        update.return_value = job_exec

        self.assertEqual((1, 'PENDING', None), oje.run_job(job_exec))
コード例 #15
0
ファイル: test_oozie.py プロジェクト: openstack/sahara
    def test_prepare_run_job(self, job, data_source, update,
                             remote, wf_factory, get_ds_urls,
                             prepare_cluster):
        wf_factory.return_value = mock.MagicMock()

        remote_class = mock.MagicMock()
        remote_class.__exit__.return_value = 'closed'
        remote.return_value = remote_class

        job_class = mock.MagicMock()
        job_class.name = "myJob"
        job.return_value = job_class

        source = mock.MagicMock()
        source.url = "localhost"

        get_ds_urls.return_value = ('url', 'url')

        data_source.return_value = source
        oje = FakeOozieJobEngine(u.create_cluster())
        _, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG)
        update.return_value = job_exec

        res = oje._prepare_run_job(job_exec)
        self.assertEqual(ctx.ctx(), res['context'])
        self.assertEqual('hadoop', res['hdfs_user'])
        self.assertEqual(job_exec, res['job_execution'])
        self.assertEqual({}, res['oozie_params'])
コード例 #16
0
ファイル: test_job_manager.py プロジェクト: stannie42/sahara
    def test_build_workflow_for_job_hive(self, job_binary):

        job, job_exec = u.create_job_exec(edp.JOB_TYPE_HIVE)
        job_binary.return_value = {"name": "script.q"}

        input_data = u.create_data_source('swift://ex/i')
        output_data = u.create_data_source('swift://ex/o')

        res = workflow_factory.get_workflow_xml(
            job, u.create_cluster(), job_exec, input_data, output_data,
            'hadoop')

        self.assertIn("""
      <job-xml>/user/hadoop/conf/hive-site.xml</job-xml>
      <configuration>
        <property>
          <name>fs.swift.service.sahara.password</name>
          <value>admin1</value>
        </property>
        <property>
          <name>fs.swift.service.sahara.username</name>
          <value>admin</value>
        </property>
      </configuration>
      <script>script.q</script>
      <param>INPUT=swift://ex.sahara/i</param>
      <param>OUTPUT=swift://ex.sahara/o</param>""", res)
コード例 #17
0
ファイル: test_job_manager.py プロジェクト: stannie42/sahara
    def test_build_workflow_for_job_java(self):
        # If args include swift paths, user and password values
        # will have to be supplied via configs instead of being
        # lifted from input or output data sources
        configs = {sw.HADOOP_SWIFT_USERNAME: '******',
                   sw.HADOOP_SWIFT_PASSWORD: '******'}

        configs = {
            'configs': configs,
            'args': ['swift://ex/i',
                     'output_path']
        }

        job, job_exec = u.create_job_exec(edp.JOB_TYPE_JAVA, configs)
        res = workflow_factory.get_workflow_xml(
            job, u.create_cluster(), job_exec)

        self.assertIn("""
      <configuration>
        <property>
          <name>fs.swift.service.sahara.password</name>
          <value>admin1</value>
        </property>
        <property>
          <name>fs.swift.service.sahara.username</name>
          <value>admin</value>
        </property>
      </configuration>
      <main-class>%s</main-class>
      <java-opts>%s</java-opts>
      <arg>swift://ex.sahara/i</arg>
      <arg>output_path</arg>""" % (_java_main_class, _java_opts), res)
コード例 #18
0
ファイル: test_oozie.py プロジェクト: butterfy76/sahara
 def test_upload_workflow_file(self, remote_get):
     oje = FakeOozieJobEngine(u.create_cluster())
     remote_class = mock.MagicMock()
     remote_class.__exit__.return_value = 'closed'
     remote_get.return_value = remote_class
     res = oje._upload_workflow_file(remote_get, "test", "hadoop.xml",
                                     'hdfs')
     self.assertEqual("test/workflow.xml", res)
コード例 #19
0
ファイル: test_oozie.py プロジェクト: gongwayne/Openstack
 def test_upload_workflow_file(self, remote_get):
     oje = FakeOozieJobEngine(u.create_cluster())
     remote_class = mock.MagicMock()
     remote_class.__exit__.return_value = 'closed'
     remote_get.return_value = remote_class
     res = oje._upload_workflow_file(remote_get, "test", "hadoop.xml",
                                     'hdfs')
     self.assertEqual("test/workflow.xml", res)
コード例 #20
0
ファイル: test_oozie.py プロジェクト: gongwayne/Openstack
    def test_hdfs_create_workflow_dir(self, remote):
        remote_class = mock.MagicMock()
        remote_class.__exit__.return_value = 'closed'
        remote.return_value = remote_class

        oje = FakeOozieJobEngine(u.create_cluster())
        job, _ = u.create_job_exec(edp.JOB_TYPE_PIG)
        res = oje._create_hdfs_workflow_dir(mock.Mock(), job)
        self.assertIn('/user/hadoop/special_name/', res)
コード例 #21
0
ファイル: test_oozie.py プロジェクト: butterfy76/sahara
    def test_hdfs_create_workflow_dir(self, remote):
        remote_class = mock.MagicMock()
        remote_class.__exit__.return_value = 'closed'
        remote.return_value = remote_class

        oje = FakeOozieJobEngine(u.create_cluster())
        job, _ = u.create_job_exec(edp.JOB_TYPE_PIG)
        res = oje._create_hdfs_workflow_dir(mock.Mock(), job)
        self.assertIn('/user/hadoop/special_name/', res)
コード例 #22
0
ファイル: test_job_manager.py プロジェクト: thefuyang/sahara
    def test_build_workflow_for_job_java_with_adapter(self, edp_conf_mock):
        edp_conf_mock.return_value = True

        configs = {"configs": {"edp.java.main_class": "some_main"}}
        job, job_exec = u.create_job_exec(edp.JOB_TYPE_JAVA, configs)
        res = workflow_factory.get_workflow_xml(job, u.create_cluster(), job_exec.job_configs)

        self.assertIn("<main-class>org.openstack.sahara.edp.MainWrapper</main-class>", res)
        self.assertNotIn("some_main", res)
コード例 #23
0
ファイル: test_oozie.py プロジェクト: butterfy76/sahara
    def test_add_postfix(self):
        oje = FakeOozieJobEngine(u.create_cluster())

        self.override_config("job_workflow_postfix", 'caba')
        res = oje._add_postfix('aba')
        self.assertEqual("aba/caba/", res)

        self.override_config("job_workflow_postfix", '')
        res = oje._add_postfix('aba')
        self.assertEqual("aba/", res)
コード例 #24
0
ファイル: test_job_manager.py プロジェクト: thefuyang/sahara
    def test_run_job_unsupported_type(self, cluster_get, job_exec_get, job_get):
        job, job_exec = u.create_job_exec("unsupported_type")
        job_exec_get.return_value = job_exec
        job_get.return_value = job

        cluster = u.create_cluster()
        cluster.status = c_u.CLUSTER_STATUS_ACTIVE
        cluster_get.return_value = cluster
        with testtools.ExpectedException(ex.EDPError):
            job_manager._run_job(job_exec.id)
コード例 #25
0
ファイル: test_oozie.py プロジェクト: gongwayne/Openstack
    def test_add_postfix(self):
        oje = FakeOozieJobEngine(u.create_cluster())

        self.override_config("job_workflow_postfix", 'caba')
        res = oje._add_postfix('aba')
        self.assertEqual("aba/caba/", res)

        self.override_config("job_workflow_postfix", '')
        res = oje._add_postfix('aba')
        self.assertEqual("aba/", res)
コード例 #26
0
    def test_run_job_unsupported_type(self,
                                      cluster_get, job_exec_get, job_get):
        job, job_exec = u.create_job_exec("unsupported_type")
        job_exec_get.return_value = job_exec
        job_get.return_value = job

        cluster = u.create_cluster()
        cluster.status = c_u.CLUSTER_STATUS_ACTIVE
        cluster_get.return_value = cluster
        with testtools.ExpectedException(ex.EDPError):
            job_manager._run_job(job_exec.id)
コード例 #27
0
ファイル: test_job_manager.py プロジェクト: thefuyang/sahara
    def test_build_workflow_for_job_shell(self):
        configs = {"configs": {"k1": "v1"}, "params": {"p1": "v1"}, "args": ["a1", "a2"]}
        job, job_exec = u.create_job_exec(edp.JOB_TYPE_SHELL, configs)
        res = workflow_factory.get_workflow_xml(job, u.create_cluster(), job_exec.job_configs)

        self.assertIn("<name>k1</name>", res)
        self.assertIn("<value>v1</value>", res)

        self.assertIn("<env-var>p1=v1</env-var>", res)

        self.assertIn("<argument>a1</argument>", res)
        self.assertIn("<argument>a2</argument>", res)
コード例 #28
0
ファイル: test_job_manager.py プロジェクト: wuhsh/sahara
    def test_suspend_unsuspendible_job(self, suspend_job_get, cluster_get,
                                       job_exec_get, job_get):
        info = {'status': edp.JOB_STATUS_SUCCEEDED}
        job, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG, None, False, info)
        job_exec_get.return_value = job_exec
        job_get.return_value = job

        cluster = u.create_cluster()
        cluster.status = "Active"
        cluster_get.return_value = cluster

        self.assertEqual(0, suspend_job_get.call_count)
コード例 #29
0
    def test_build_workflow_for_job_java_with_adapter(self, edp_conf_mock):
        edp_conf_mock.return_value = True

        configs = {"configs": {"edp.java.main_class": "some_main"}}
        job, job_exec = u.create_job_exec(edp.JOB_TYPE_JAVA, configs)
        res = workflow_factory.get_workflow_xml(
            job, u.create_cluster(), job_exec.job_configs)

        self.assertIn(
            "<main-class>org.openstack.sahara.edp.MainWrapper</main-class>",
            res)
        self.assertNotIn("some_main", res)
コード例 #30
0
ファイル: test_oozie.py プロジェクト: openstack/sahara
    def test_get_job_status(self):
        oje = FakeOozieJobEngine(u.create_cluster())
        client_class = mock.MagicMock()
        client_class.add_job = mock.MagicMock(return_value=1)
        client_class.get_job_info = mock.MagicMock(
            return_value={'status': 'PENDING'})
        oje.get_client = mock.MagicMock(return_value=client_class)

        _, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG)
        self.assertIsNone(oje.get_job_status(job_exec))

        job_exec.engine_job_id = 1
        self.assertEqual({'status': 'PENDING'}, oje.get_job_status(job_exec))
コード例 #31
0
    def test_get_job_status(self):
        oje = FakeOozieJobEngine(u.create_cluster())
        client_class = mock.MagicMock()
        client_class.add_job = mock.MagicMock(return_value=1)
        client_class.get_job_info = mock.MagicMock(
            return_value={'status': 'PENDING'})
        oje.get_client = mock.MagicMock(return_value=client_class)

        _, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG)
        self.assertIsNone(oje.get_job_status(job_exec))

        job_exec.engine_job_id = 1
        self.assertEqual({'status': 'PENDING'}, oje.get_job_status(job_exec))
コード例 #32
0
ファイル: test_job_manager.py プロジェクト: thefuyang/sahara
    def test_scheduled_edp_job_run(self, job_exec_get, cluster_get, job_get, run_scheduled_job):
        configs = {"job_execution_info": {"job_execution_type": "scheduled", "start": "2015-5-15T01:00Z"}}
        job, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG, configs)
        job_exec_get.return_value = job_exec
        job_get.return_value = job

        cluster = u.create_cluster()
        cluster.status = "Active"
        cluster_get.return_value = cluster

        job_manager._run_job(job_exec.id)

        self.assertEqual(1, run_scheduled_job.call_count)
コード例 #33
0
ファイル: test_oozie.py プロジェクト: gongwayne/Openstack
    def test_cancel_job(self, kill_get, info_get):
        info_get.return_value = {}
        oje = FakeOozieJobEngine(u.create_cluster())
        _, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG)

        # test cancel job without engine_job_id
        job_exec.engine_job_id = None
        oje.cancel_job(job_exec)
        self.assertEqual(0, kill_get.call_count)

        # test cancel job with engine_job_id
        job_exec.engine_job_id = 123
        oje.cancel_job(job_exec)
        self.assertEqual(1, kill_get.call_count)
コード例 #34
0
ファイル: test_oozie.py プロジェクト: butterfy76/sahara
    def test_hdfs_upload_job_files(self, conductor_raw_data, remote_class,
                                   remote):
        remote_class.__exit__.return_value = 'closed'
        remote.return_value = remote_class
        conductor_raw_data.return_value = 'ok'

        oje = FakeOozieJobEngine(u.create_cluster())
        job, _ = u.create_job_exec(edp.JOB_TYPE_PIG)
        res = oje._upload_job_files_to_hdfs(mock.Mock(), 'job_prefix', job, {})
        self.assertEqual(['job_prefix/script.pig'], res)

        job, _ = u.create_job_exec(edp.JOB_TYPE_MAPREDUCE)
        res = oje._upload_job_files_to_hdfs(mock.Mock(), 'job_prefix', job, {})
        self.assertEqual(['job_prefix/lib/main.jar'], res)
コード例 #35
0
ファイル: test_oozie.py プロジェクト: gongwayne/Openstack
    def test_hdfs_upload_job_files(self, conductor_raw_data, remote_class,
                                   remote):
        remote_class.__exit__.return_value = 'closed'
        remote.return_value = remote_class
        conductor_raw_data.return_value = 'ok'

        oje = FakeOozieJobEngine(u.create_cluster())
        job, _ = u.create_job_exec(edp.JOB_TYPE_PIG)
        res = oje._upload_job_files_to_hdfs(mock.Mock(), 'job_prefix', job, {})
        self.assertEqual(['job_prefix/script.pig'], res)

        job, _ = u.create_job_exec(edp.JOB_TYPE_MAPREDUCE)
        res = oje._upload_job_files_to_hdfs(mock.Mock(), 'job_prefix', job, {})
        self.assertEqual(['job_prefix/lib/main.jar'], res)
コード例 #36
0
ファイル: test_oozie.py プロジェクト: butterfy76/sahara
    def test_cancel_job(self, kill_get, info_get):
        info_get.return_value = {}
        oje = FakeOozieJobEngine(u.create_cluster())
        _, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG)

        # test cancel job without engine_job_id
        job_exec.engine_job_id = None
        oje.cancel_job(job_exec)
        self.assertEqual(0, kill_get.call_count)

        # test cancel job with engine_job_id
        job_exec.engine_job_id = 123
        oje.cancel_job(job_exec)
        self.assertEqual(1, kill_get.call_count)
コード例 #37
0
ファイル: test_job_manager.py プロジェクト: Imperat/sahara
    def test_suspend_unsuspendible_job(self, suspend_job_get,
                                       cluster_get, job_exec_get, job_get):
        info = {
            'status': edp.JOB_STATUS_SUCCEEDED
        }
        job, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG, None, False, info)
        job_exec_get.return_value = job_exec
        job_get.return_value = job

        cluster = u.create_cluster()
        cluster.status = "Active"
        cluster_get.return_value = cluster

        self.assertEqual(0, suspend_job_get.call_count)
コード例 #38
0
    def test_build_workflow_for_job_shell(self):
        configs = {"configs": {"k1": "v1"},
                   "params": {"p1": "v1"},
                   "args": ["a1", "a2"]}
        job, job_exec = u.create_job_exec(edp.JOB_TYPE_SHELL, configs)
        res = workflow_factory.get_workflow_xml(
            job, u.create_cluster(), job_exec.job_configs)

        self.assertIn("<name>k1</name>", res)
        self.assertIn("<value>v1</value>", res)

        self.assertIn("<env-var>p1=v1</env-var>", res)

        self.assertIn("<argument>a1</argument>", res)
        self.assertIn("<argument>a2</argument>", res)
コード例 #39
0
ファイル: test_job_manager.py プロジェクト: stannie42/sahara
    def _build_workflow_common(self, job_type, streaming=False):
        if streaming:
            configs = {'edp.streaming.mapper': '/usr/bin/cat',
                       'edp.streaming.reducer': '/usr/bin/wc'}
            configs = {'configs': configs}
        else:
            configs = {}

        job, job_exec = u.create_job_exec(job_type, configs)

        input_data = u.create_data_source('swift://ex/i')
        output_data = u.create_data_source('swift://ex/o')

        res = workflow_factory.get_workflow_xml(
            job, u.create_cluster(), job_exec, input_data, output_data,
            'hadoop')

        if streaming:
            self.assertIn("""
      <streaming>
        <mapper>/usr/bin/cat</mapper>
        <reducer>/usr/bin/wc</reducer>
      </streaming>""", res)

        self.assertIn("""
        <property>
          <name>mapred.output.dir</name>
          <value>swift://ex.sahara/o</value>
        </property>""", res)

        self.assertIn("""
        <property>
          <name>mapred.input.dir</name>
          <value>swift://ex.sahara/i</value>
        </property>""", res)

        self.assertIn("""
        <property>
          <name>fs.swift.service.sahara.password</name>
          <value>admin1</value>
        </property>""", res)

        self.assertIn("""
        <property>
          <name>fs.swift.service.sahara.username</name>
          <value>admin</value>
        </property>""", res)
コード例 #40
0
ファイル: test_job_manager.py プロジェクト: thefuyang/sahara
    def test_failed_to_cancel_job(self, time_get, cluster_get, job_exec_get, job_get, job_execution_update_get):
        info = {"status": edp.JOB_STATUS_RUNNING}

        job, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG, None, False, info)
        job_exec_get.return_value = job_exec
        job_get.return_value = job

        cluster = u.create_cluster()
        cluster.status = c_u.CLUSTER_STATUS_ACTIVE
        cluster_get.return_value = cluster

        time_get.return_value = 10000

        job_execution_update_get.return_value = job_exec

        with testtools.ExpectedException(ex.CancelingFailed):
            job_manager.cancel_job(job_exec.id)
コード例 #41
0
ファイル: test_oozie.py プロジェクト: KernelPryanic/sahara
    def test_get_oozie_job_params(self):
        oje = FakeOozieJobEngine(u.create_cluster())
        oozie_params = {
            'oozie.libpath': '/mylibpath',
            'oozie.wf.application.path': '/wrong'
        }
        job_params = oje._get_oozie_job_params('hadoop', '/tmp', oozie_params)
        self.assertEqual('http://localhost:50030', job_params["jobTracker"])
        self.assertEqual('hdfs://localhost:8020', job_params["nameNode"])
        self.assertEqual('hadoop', job_params["user.name"])
        self.assertEqual('hdfs://localhost:8020/tmp',
                         job_params['oozie.wf.application.path'])
        self.assertEqual('/mylibpath', job_params['oozie.libpath'])

        # Make sure this doesn't raise an exception
        job_params = oje._get_oozie_job_params('hadoop', '/tmp', {})
        self.assertNotIn('oozie.libpath', job_params)
コード例 #42
0
ファイル: test_job_manager.py プロジェクト: wuhsh/sahara
    def test_failed_to_cancel_job(self, time_get, cluster_get, job_exec_get,
                                  job_get, job_execution_update_get):
        info = {'status': edp.JOB_STATUS_RUNNING}

        job, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG, None, False, info)
        job_exec_get.return_value = job_exec
        job_get.return_value = job

        cluster = u.create_cluster()
        cluster.status = c_u.CLUSTER_STATUS_ACTIVE
        cluster_get.return_value = cluster

        time_get.return_value = 10000

        job_execution_update_get.return_value = job_exec

        with testtools.ExpectedException(ex.CancelingFailed):
            job_manager.cancel_job(job_exec.id)
コード例 #43
0
    def test_scheduled_edp_job_run(self, job_exec_get, cluster_get,
                                   job_get, run_scheduled_job):
        configs = {
            'job_execution_info': {
                'job_execution_type': 'scheduled',
                'start': '2015-5-15T01:00Z'
            }
        }
        job, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG, configs)
        job_exec_get.return_value = job_exec
        job_get.return_value = job

        cluster = u.create_cluster()
        cluster.status = "Active"
        cluster_get.return_value = cluster

        job_manager._run_job(job_exec.id)

        self.assertEqual(1, run_scheduled_job.call_count)
コード例 #44
0
ファイル: test_oozie.py プロジェクト: crobby/sahara
    def test_get_oozie_job_params(self):
        oje = FakeOozieJobEngine(u.create_cluster())
        oozie_params = {'oozie.libpath': '/mylibpath',
                        'oozie.wf.application.path': '/wrong'}
        job_params = oje._get_oozie_job_params('hadoop',
                                               '/tmp', oozie_params, True)
        self.assertEqual('http://localhost:50030', job_params["jobTracker"])
        self.assertEqual('hdfs://localhost:8020', job_params["nameNode"])
        self.assertEqual('hadoop', job_params["user.name"])
        self.assertEqual('hdfs://localhost:8020/tmp',
                         job_params['oozie.wf.application.path'])
        self.assertEqual("/mylibpath,hdfs://localhost:8020/user/"
                         "sahara-hbase-lib", job_params['oozie.libpath'])

        # Make sure this doesn't raise an exception
        job_params = oje._get_oozie_job_params('hadoop',
                                               '/tmp', {}, True)
        self.assertEqual("hdfs://localhost:8020/user/"
                         "sahara-hbase-lib", job_params['oozie.libpath'])
コード例 #45
0
ファイル: test_oozie.py プロジェクト: ekasitk/sahara
    def test__resolve_external_hdfs_urls(self):

        oje = FakeOozieJobEngine(u.create_cluster())
        job_configs = {
            "configs": {
                "mapred.map.tasks": "1",
                "hdfs1": "hdfs://localhost/hdfs1"},
            "args": ["hdfs://localhost/hdfs3", "10"],
            "params": {
                "param1": "10",
                "param2": "hdfs://localhost/hdfs2"
            }
        }

        expected_external_hdfs_urls = ['hdfs://localhost/hdfs1',
                                       'hdfs://localhost/hdfs2',
                                       'hdfs://localhost/hdfs3']

        external_hdfs_urls = oje._resolve_external_hdfs_urls(job_configs)

        self.assertEqual(expected_external_hdfs_urls, external_hdfs_urls)
コード例 #46
0
ファイル: test_oozie.py プロジェクト: gongwayne/Openstack
    def test__resolve_external_hdfs_urls(self):

        oje = FakeOozieJobEngine(u.create_cluster())
        job_configs = {
            "configs": {
                "mapred.map.tasks": "1",
                "hdfs1": "hdfs://localhost/hdfs1"},
            "args": ["hdfs://localhost/hdfs3", "10"],
            "params": {
                "param1": "10",
                "param2": "hdfs://localhost/hdfs2"
            }
        }

        expected_external_hdfs_urls = ['hdfs://localhost/hdfs1',
                                       'hdfs://localhost/hdfs2',
                                       'hdfs://localhost/hdfs3']

        external_hdfs_urls = oje._resolve_external_hdfs_urls(job_configs)

        self.assertEqual(expected_external_hdfs_urls, external_hdfs_urls)
コード例 #47
0
ファイル: test_job_manager.py プロジェクト: degorenko/sahara
    def test_build_workflow_for_job_java(self):
        # If args include swift paths, user and password values
        # will have to be supplied via configs instead of being
        # lifted from input or output data sources
        configs = {sw.HADOOP_SWIFT_USERNAME: '******',
                   sw.HADOOP_SWIFT_PASSWORD: '******'}

        configs = {
            'configs': configs,
            'args': ['swift://ex/i',
                     'output_path']
        }

        job, job_exec = u.create_job_exec(edp.JOB_TYPE_JAVA, configs)
        res = workflow_factory.get_workflow_xml(
            job, u.create_cluster(), job_exec)

        self.assertIn("""
      <configuration>
        <property>
          <name>fs.swift.service.sahara.password</name>
          <value>admin1</value>
        </property>
        <property>
          <name>fs.swift.service.sahara.username</name>
          <value>admin</value>
        </property>
      </configuration>
      <main-class>%s</main-class>
      <java-opts>%s</java-opts>
      <arg>swift://ex.sahara/i</arg>
      <arg>output_path</arg>""" % (_java_main_class, _java_opts), res)

        # testing workflow creation with a proxy domain
        self.override_config('use_domain_for_proxy_users', True)
        self.override_config("proxy_user_domain_name", 'sahara_proxy_domain')
        configs = {
            'configs': {},
            'args': ['swift://ex/i',
                     'output_path']
        }

        job, job_exec = u.create_job_exec(edp.JOB_TYPE_JAVA, configs,
                                          proxy=True)
        res = workflow_factory.get_workflow_xml(job, u.create_cluster(),
                                                job_exec)

        self.assertIn("""
      <configuration>
        <property>
          <name>fs.swift.service.sahara.domain.name</name>
          <value>sahara_proxy_domain</value>
        </property>
        <property>
          <name>fs.swift.service.sahara.password</name>
          <value>55555555-6666-7777-8888-999999999999</value>
        </property>
        <property>
          <name>fs.swift.service.sahara.trust.id</name>
          <value>0123456789abcdef0123456789abcdef</value>
        </property>
        <property>
          <name>fs.swift.service.sahara.username</name>
          <value>job_00000000-1111-2222-3333-4444444444444444</value>
        </property>
      </configuration>
      <main-class>%s</main-class>
      <java-opts>%s</java-opts>
      <arg>swift://ex.sahara/i</arg>
      <arg>output_path</arg>""" % (_java_main_class, _java_opts), res)
コード例 #48
0
ファイル: test_job_manager.py プロジェクト: degorenko/sahara
 def test_get_plugin(self):
     plugin = job_utils.get_plugin(u.create_cluster())
     self.assertEqual("vanilla", plugin.name)
コード例 #49
0
ファイル: test_swift_type.py プロジェクト: wuhsh/sahara
    def test_prepare_cluster(self, ctx):

        ctx.return_value = 'dummy'

        ds_url = "swift://container/input"
        ds = u.create_data_source(ds_url,
                                  name="data_source",
                                  id=uuidutils.generate_uuid())

        job_configs = {
            'configs': {
                job_utils.DATA_SOURCE_SUBST_NAME: True,
                job_utils.DATA_SOURCE_SUBST_UUID: True
            }
        }

        old_configs = copy.deepcopy(job_configs)

        self.s_type.prepare_cluster(ds,
                                    u.create_cluster(),
                                    job_configs=job_configs)
        # Swift configs should be filled in since they were blank
        self.assertEqual(
            ds.credentials['user'],
            job_configs['configs']['fs.swift.service.sahara.username'])
        self.assertEqual(
            ds.credentials['password'],
            job_configs['configs']['fs.swift.service.sahara.password'])

        self.assertNotEqual(old_configs, job_configs)

        job_configs['configs'] = {
            'fs.swift.service.sahara.username': '******',
            'fs.swift.service.sahara.password': '******',
            job_utils.DATA_SOURCE_SUBST_NAME: False,
            job_utils.DATA_SOURCE_SUBST_UUID: True
        }
        old_configs = copy.deepcopy(job_configs)

        self.s_type.prepare_cluster(ds,
                                    u.create_cluster(),
                                    job_configs=job_configs)
        # Swift configs should not be overwritten
        self.assertEqual(old_configs['configs'], job_configs['configs'])

        job_configs['configs'] = {
            job_utils.DATA_SOURCE_SUBST_NAME: True,
            job_utils.DATA_SOURCE_SUBST_UUID: False
        }
        job_configs['proxy_configs'] = {
            'proxy_username': '******',
            'proxy_password': '******',
            'proxy_trust_id': 'trustme'
        }
        old_configs = copy.deepcopy(job_configs)
        self.s_type.prepare_cluster(ds,
                                    u.create_cluster(),
                                    job_configs=job_configs)

        # Swift configs should be empty and proxy configs should be preserved
        self.assertEqual(old_configs['configs'], job_configs['configs'])
        self.assertEqual(old_configs['proxy_configs'],
                         job_configs['proxy_configs'])

        # If there's no configs do nothing
        job_configs['configs'] = None
        old_configs = copy.deepcopy(job_configs)

        self.s_type.prepare_cluster(ds,
                                    u.create_cluster(),
                                    job_configs=job_configs)
        self.assertEqual(old_configs, job_configs)

        # If it's a FrozenDict do nothing
        job_configs = {
            'configs': {
                job_utils.DATA_SOURCE_SUBST_NAME: True,
                job_utils.DATA_SOURCE_SUBST_UUID: True
            }
        }

        old_configs = copy.deepcopy(job_configs)
        job_configs = FrozenDict(job_configs)

        self.s_type.prepare_cluster(ds,
                                    u.create_cluster(),
                                    job_configs=job_configs)
        self.assertEqual(old_configs, job_configs)
コード例 #50
0
ファイル: test_job_manager.py プロジェクト: degorenko/sahara
    def _build_workflow_common(self, job_type, streaming=False, proxy=False):
        if streaming:
            configs = {'edp.streaming.mapper': '/usr/bin/cat',
                       'edp.streaming.reducer': '/usr/bin/wc'}
            configs = {'configs': configs}
        else:
            configs = {}

        job, job_exec = u.create_job_exec(job_type, configs)

        input_data = u.create_data_source('swift://ex/i')
        output_data = u.create_data_source('swift://ex/o')

        res = workflow_factory.get_workflow_xml(
            job, u.create_cluster(), job_exec, input_data, output_data,
            'hadoop')

        if streaming:
            self.assertIn("""
      <streaming>
        <mapper>/usr/bin/cat</mapper>
        <reducer>/usr/bin/wc</reducer>
      </streaming>""", res)

        self.assertIn("""
        <property>
          <name>mapred.output.dir</name>
          <value>swift://ex.sahara/o</value>
        </property>""", res)

        self.assertIn("""
        <property>
          <name>mapred.input.dir</name>
          <value>swift://ex.sahara/i</value>
        </property>""", res)

        if not proxy:
            self.assertIn("""
        <property>
          <name>fs.swift.service.sahara.password</name>
          <value>admin1</value>
        </property>""", res)

            self.assertIn("""
        <property>
          <name>fs.swift.service.sahara.username</name>
          <value>admin</value>
        </property>""", res)
        else:
            # testing workflow creation with a proxy domain
            self.override_config('use_domain_for_proxy_users', True)
            self.override_config("proxy_user_domain_name",
                                 'sahara_proxy_domain')
            job, job_exec = u.create_job_exec(job_type, proxy=True)

            res = workflow_factory.get_workflow_xml(
                job, u.create_cluster(), job_exec, input_data, output_data,
                'hadoop')

            self.assertIn("""
        <property>
          <name>fs.swift.service.sahara.domain.name</name>
          <value>sahara_proxy_domain</value>
        </property>
        <property>
          <name>fs.swift.service.sahara.password</name>
          <value>55555555-6666-7777-8888-999999999999</value>
        </property>
        <property>
          <name>fs.swift.service.sahara.trust.id</name>
          <value>0123456789abcdef0123456789abcdef</value>
        </property>
        <property>
          <name>fs.swift.service.sahara.username</name>
          <value>job_00000000-1111-2222-3333-4444444444444444</value>
        </property>""", res)
コード例 #51
0
ファイル: test_oozie.py プロジェクト: suriya2612/sahara
 def test_get_oozie_job_params(self):
     oje = FakeOozieJobEngine(u.create_cluster())
     job_params = oje._get_oozie_job_params('hadoop', '/tmp')
     self.assertEqual('http://localhost:50030', job_params["jobTracker"])
     self.assertEqual('hdfs://localhost:8020', job_params["nameNode"])
     self.assertEqual('hadoop', job_params["user.name"])
コード例 #52
0
    def test_build_workflow_swift_configs(self, job_binary):

        # Test that swift configs come from either input or output data sources
        job, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG, configs={})
        job_binary.return_value = {"name": "script.pig"}

        input_data = u.create_data_source('swift://ex/i')
        output_data = u.create_data_source('hdfs://user/hadoop/out')
        data_source_urls = {input_data.id: input_data.url,
                            output_data.id: output_data.url}

        res = workflow_factory.get_workflow_xml(
            job, u.create_cluster(), job_exec.job_configs,
            input_data, output_data, 'hadoop', data_source_urls)

        self.assertIn("""
      <configuration>
        <property>
          <name>fs.swift.service.sahara.password</name>
          <value>admin1</value>
        </property>
        <property>
          <name>fs.swift.service.sahara.username</name>
          <value>admin</value>
        </property>
      </configuration>""", res)

        input_data = u.create_data_source('hdfs://user/hadoop/in')
        output_data = u.create_data_source('swift://ex/o')
        data_source_urls = {input_data.id: input_data.url,
                            output_data.id: output_data.url}

        res = workflow_factory.get_workflow_xml(
            job, u.create_cluster(), job_exec.job_configs,
            input_data, output_data, 'hadoop', data_source_urls)

        self.assertIn("""
      <configuration>
        <property>
          <name>fs.swift.service.sahara.password</name>
          <value>admin1</value>
        </property>
        <property>
          <name>fs.swift.service.sahara.username</name>
          <value>admin</value>
        </property>
      </configuration>""", res)

        job, job_exec = u.create_job_exec(
            edp.JOB_TYPE_PIG, configs={'configs': {'dummy': 'value'}})
        input_data = u.create_data_source('hdfs://user/hadoop/in')
        output_data = u.create_data_source('hdfs://user/hadoop/out')
        data_source_urls = {input_data.id: input_data.url,
                            output_data.id: output_data.url}

        res = workflow_factory.get_workflow_xml(
            job, u.create_cluster(), job_exec.job_configs,
            input_data, output_data, 'hadoop', data_source_urls)

        self.assertIn("""
      <configuration>
        <property>
          <name>dummy</name>
          <value>value</value>
        </property>
      </configuration>""", res)
コード例 #53
0
    def _build_workflow_common(self, job_type, streaming=False, proxy=False):
        if streaming:
            configs = {'edp.streaming.mapper': '/usr/bin/cat',
                       'edp.streaming.reducer': '/usr/bin/wc'}
            configs = {'configs': configs}
        else:
            configs = {}

        job, job_exec = u.create_job_exec(job_type, configs)

        input_data = u.create_data_source('swift://ex/i')
        output_data = u.create_data_source('swift://ex/o')
        data_source_urls = {input_data.id: input_data.url,
                            output_data.id: output_data.url}

        res = workflow_factory.get_workflow_xml(
            job, u.create_cluster(), job_exec.job_configs,
            input_data, output_data, 'hadoop', data_source_urls)

        if streaming:
            self.assertIn("""
      <streaming>
        <mapper>/usr/bin/cat</mapper>
        <reducer>/usr/bin/wc</reducer>
      </streaming>""", res)

        self.assertIn("""
        <property>
          <name>mapred.output.dir</name>
          <value>swift://ex.sahara/o</value>
        </property>""", res)

        self.assertIn("""
        <property>
          <name>mapred.input.dir</name>
          <value>swift://ex.sahara/i</value>
        </property>""", res)

        if not proxy:
            self.assertIn("""
        <property>
          <name>fs.swift.service.sahara.password</name>
          <value>admin1</value>
        </property>""", res)

            self.assertIn("""
        <property>
          <name>fs.swift.service.sahara.username</name>
          <value>admin</value>
        </property>""", res)
        else:
            # testing workflow creation with a proxy domain
            self.override_config('use_domain_for_proxy_users', True)
            self.override_config("proxy_user_domain_name",
                                 'sahara_proxy_domain')
            job, job_exec = u.create_job_exec(job_type, proxy=True)

            res = workflow_factory.get_workflow_xml(
                job, u.create_cluster(), job_exec.job_configs,
                input_data, output_data, 'hadoop', data_source_urls)

            self.assertIn("""
        <property>
          <name>fs.swift.service.sahara.domain.name</name>
          <value>sahara_proxy_domain</value>
        </property>
        <property>
          <name>fs.swift.service.sahara.password</name>
          <value>55555555-6666-7777-8888-999999999999</value>
        </property>
        <property>
          <name>fs.swift.service.sahara.trust.id</name>
          <value>0123456789abcdef0123456789abcdef</value>
        </property>
        <property>
          <name>fs.swift.service.sahara.username</name>
          <value>job_00000000-1111-2222-3333-4444444444444444</value>
        </property>""", res)
コード例 #54
0
    def test_build_workflow_for_job_java(self):
        # If args include swift paths, user and password values
        # will have to be supplied via configs instead of being
        # lifted from input or output data sources
        configs = {sw.HADOOP_SWIFT_USERNAME: '******',
                   sw.HADOOP_SWIFT_PASSWORD: '******'}

        configs = {
            'configs': configs,
            'args': ['swift://ex/i',
                     'output_path']
        }

        job, job_exec = u.create_job_exec(edp.JOB_TYPE_JAVA, configs)
        res = workflow_factory.get_workflow_xml(
            job, u.create_cluster(), job_exec.job_configs)

        self.assertIn("""
      <configuration>
        <property>
          <name>fs.swift.service.sahara.password</name>
          <value>admin1</value>
        </property>
        <property>
          <name>fs.swift.service.sahara.username</name>
          <value>admin</value>
        </property>
      </configuration>
      <main-class>%s</main-class>
      <java-opts>%s</java-opts>
      <arg>swift://ex.sahara/i</arg>
      <arg>output_path</arg>""" % (_java_main_class, _java_opts), res)

        # testing workflow creation with a proxy domain
        self.override_config('use_domain_for_proxy_users', True)
        self.override_config("proxy_user_domain_name", 'sahara_proxy_domain')
        configs = {
            'configs': {},
            'args': ['swift://ex/i',
                     'output_path']
        }

        job, job_exec = u.create_job_exec(edp.JOB_TYPE_JAVA, configs,
                                          proxy=True)
        res = workflow_factory.get_workflow_xml(job, u.create_cluster(),
                                                job_exec.job_configs)

        self.assertIn("""
      <configuration>
        <property>
          <name>fs.swift.service.sahara.domain.name</name>
          <value>sahara_proxy_domain</value>
        </property>
        <property>
          <name>fs.swift.service.sahara.password</name>
          <value>55555555-6666-7777-8888-999999999999</value>
        </property>
        <property>
          <name>fs.swift.service.sahara.trust.id</name>
          <value>0123456789abcdef0123456789abcdef</value>
        </property>
        <property>
          <name>fs.swift.service.sahara.username</name>
          <value>job_00000000-1111-2222-3333-4444444444444444</value>
        </property>
      </configuration>
      <main-class>%s</main-class>
      <java-opts>%s</java-opts>
      <arg>swift://ex.sahara/i</arg>
      <arg>output_path</arg>""" % (_java_main_class, _java_opts), res)
コード例 #55
0
 def test_get_plugin(self):
     plugin = job_utils.get_plugin(u.create_cluster())
     self.assertEqual("vanilla", plugin.name)