Beispiel #1
0
    def test_build_workflow_for_job_pig(self, job_binary):

        job, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG, configs={})
        job_binary.return_value = {"name": "script.pig"}

        input_data = u.create_data_source('swift://ex/i')
        output_data = u.create_data_source('swift://ex/o')
        data_source_urls = {input_data.id: input_data.url,
                            output_data.id: output_data.url}

        res = workflow_factory.get_workflow_xml(
            job, u.create_cluster(), job_exec.job_configs,
            input_data, output_data, 'hadoop', data_source_urls)

        self.assertIn("""
      <param>INPUT=swift://ex.sahara/i</param>
      <param>OUTPUT=swift://ex.sahara/o</param>""", res)

        self.assertIn("""
      <configuration>
        <property>
          <name>fs.swift.service.sahara.password</name>
          <value>admin1</value>
        </property>
        <property>
          <name>fs.swift.service.sahara.username</name>
          <value>admin</value>
        </property>
      </configuration>""", res)

        self.assertIn("<script>script.pig</script>", res)

        # testing workflow creation with a proxy domain
        self.override_config('use_domain_for_proxy_users', True)
        self.override_config("proxy_user_domain_name", 'sahara_proxy_domain')
        job, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG, proxy=True)

        res = workflow_factory.get_workflow_xml(
            job, u.create_cluster(), job_exec.job_configs,
            input_data, output_data, 'hadoop', data_source_urls)

        self.assertIn("""
      <configuration>
        <property>
          <name>fs.swift.service.sahara.domain.name</name>
          <value>sahara_proxy_domain</value>
        </property>
        <property>
          <name>fs.swift.service.sahara.password</name>
          <value>55555555-6666-7777-8888-999999999999</value>
        </property>
        <property>
          <name>fs.swift.service.sahara.trust.id</name>
          <value>0123456789abcdef0123456789abcdef</value>
        </property>
        <property>
          <name>fs.swift.service.sahara.username</name>
          <value>job_00000000-1111-2222-3333-4444444444444444</value>
        </property>
      </configuration>""", res)
Beispiel #2
0
    def test_build_workflow_swift_configs(self, job_binary):

        # Test that swift configs come from either input or output data sources
        job, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG, configs={})
        job_binary.return_value = {"name": "script.pig"}

        input_data = u.create_data_source('swift://ex/i')
        output_data = u.create_data_source('hdfs://user/hadoop/out')

        res = workflow_factory.get_workflow_xml(
            job, u.create_cluster(), job_exec, input_data, output_data,
            'hadoop')

        self.assertIn("""
      <configuration>
        <property>
          <name>fs.swift.service.sahara.password</name>
          <value>admin1</value>
        </property>
        <property>
          <name>fs.swift.service.sahara.username</name>
          <value>admin</value>
        </property>
      </configuration>""", res)

        input_data = u.create_data_source('hdfs://user/hadoop/in')
        output_data = u.create_data_source('swift://ex/o')

        res = workflow_factory.get_workflow_xml(
            job, u.create_cluster(), job_exec, input_data, output_data,
            'hadoop')

        self.assertIn("""
      <configuration>
        <property>
          <name>fs.swift.service.sahara.password</name>
          <value>admin1</value>
        </property>
        <property>
          <name>fs.swift.service.sahara.username</name>
          <value>admin</value>
        </property>
      </configuration>""", res)

        job, job_exec = u.create_job_exec(
            edp.JOB_TYPE_PIG, configs={'configs': {'dummy': 'value'}})
        input_data = u.create_data_source('hdfs://user/hadoop/in')
        output_data = u.create_data_source('hdfs://user/hadoop/out')

        res = workflow_factory.get_workflow_xml(
            job, u.create_cluster(), job_exec, input_data, output_data,
            'hadoop')

        self.assertIn("""
      <configuration>
        <property>
          <name>dummy</name>
          <value>value</value>
        </property>
      </configuration>""", res)
Beispiel #3
0
    def test_build_workflow_for_job_hive(self, job_binary):

        job, job_exec = u.create_job_exec(edp.JOB_TYPE_HIVE, configs={})
        job_binary.return_value = {"name": "script.q"}

        input_data = u.create_data_source('swift://ex/i')
        output_data = u.create_data_source('swift://ex/o')
        data_source_urls = {input_data.id: input_data.url,
                            output_data.id: output_data.url}

        res = workflow_factory.get_workflow_xml(
            job, u.create_cluster(), job_exec.job_configs,
            input_data, output_data, 'hadoop', data_source_urls)

        doc = xml.parseString(res)
        hive = doc.getElementsByTagName('hive')[0]
        self.assertEqual('/user/hadoop/conf/hive-site.xml',
                         xmlutils.get_text_from_node(hive, 'job-xml'))

        configuration = hive.getElementsByTagName('configuration')
        properties = xmlutils.get_property_dict(configuration[0])
        self.assertEqual({'fs.swift.service.sahara.password': '******',
                          'fs.swift.service.sahara.username': '******'},
                         properties)

        self.assertEqual('script.q',
                         xmlutils.get_text_from_node(hive, 'script'))

        params = xmlutils.get_param_dict(hive)
        self.assertEqual({'INPUT': 'swift://ex.sahara/i',
                          'OUTPUT': 'swift://ex.sahara/o'}, params)

        # testing workflow creation with a proxy domain
        self.override_config('use_domain_for_proxy_users', True)
        self.override_config("proxy_user_domain_name", 'sahara_proxy_domain')

        job, job_exec = u.create_job_exec(edp.JOB_TYPE_HIVE, proxy=True)

        res = workflow_factory.get_workflow_xml(
            job, u.create_cluster(), job_exec.job_configs,
            input_data, output_data, 'hadoop', data_source_urls)

        doc = xml.parseString(res)
        hive = doc.getElementsByTagName('hive')[0]
        configuration = hive.getElementsByTagName('configuration')
        properties = xmlutils.get_property_dict(configuration[0])
        self.assertEqual({
            'fs.swift.service.sahara.domain.name':
            'sahara_proxy_domain',

            'fs.swift.service.sahara.trust.id':
            '0123456789abcdef0123456789abcdef',

            'fs.swift.service.sahara.password':
            '******',

            'fs.swift.service.sahara.username':
            '******'}, properties)
Beispiel #4
0
    def test_build_workflow_for_job_hive(self, job_binary):

        job, job_exec = u.create_job_exec(edp.JOB_TYPE_HIVE, configs={})
        job_binary.return_value = {"name": "script.q"}

        input_data = u.create_data_source("swift://ex/i")
        output_data = u.create_data_source("swift://ex/o")
        data_source_urls = {input_data.id: input_data.url, output_data.id: output_data.url}

        res = workflow_factory.get_workflow_xml(
            job, u.create_cluster(), job_exec.job_configs, input_data, output_data, "hadoop", data_source_urls
        )

        doc = xml.parseString(res)
        hive = doc.getElementsByTagName("hive")[0]
        self.assertEqual("/user/hadoop/conf/hive-site.xml", xmlutils.get_text_from_node(hive, "job-xml"))

        configuration = hive.getElementsByTagName("configuration")
        properties = xmlutils.get_property_dict(configuration[0])
        self.assertEqual(
            {"fs.swift.service.sahara.password": "******", "fs.swift.service.sahara.username": "******"}, properties
        )

        self.assertEqual("script.q", xmlutils.get_text_from_node(hive, "script"))

        params = xmlutils.get_param_dict(hive)
        self.assertEqual({"INPUT": "swift://ex.sahara/i", "OUTPUT": "swift://ex.sahara/o"}, params)

        # testing workflow creation with a proxy domain
        self.override_config("use_domain_for_proxy_users", True)
        self.override_config("proxy_user_domain_name", "sahara_proxy_domain")

        job, job_exec = u.create_job_exec(edp.JOB_TYPE_HIVE, proxy=True)

        res = workflow_factory.get_workflow_xml(
            job, u.create_cluster(), job_exec.job_configs, input_data, output_data, "hadoop", data_source_urls
        )

        doc = xml.parseString(res)
        hive = doc.getElementsByTagName("hive")[0]
        configuration = hive.getElementsByTagName("configuration")
        properties = xmlutils.get_property_dict(configuration[0])
        self.assertEqual(
            {
                "fs.swift.service.sahara.domain.name": "sahara_proxy_domain",
                "fs.swift.service.sahara.trust.id": "0123456789abcdef0123456789abcdef",
                "fs.swift.service.sahara.password": "******",
                "fs.swift.service.sahara.username": "******",
            },
            properties,
        )
Beispiel #5
0
    def test_hdfs_upload_job_files(self, conductor_raw_data, remote_class,
                                   remote):
        remote_class.__exit__.return_value = 'closed'
        remote.return_value = remote_class
        conductor_raw_data.return_value = 'ok'

        oje = FakeOozieJobEngine(u.create_cluster())
        job, _ = u.create_job_exec(edp.JOB_TYPE_PIG)
        res = oje._upload_job_files_to_hdfs(mock.Mock(), 'job_prefix', job, {})
        self.assertEqual(['job_prefix/script.pig'], res)

        job, _ = u.create_job_exec(edp.JOB_TYPE_MAPREDUCE)
        res = oje._upload_job_files_to_hdfs(mock.Mock(), 'job_prefix', job, {})
        self.assertEqual(['job_prefix/lib/main.jar'], res)
Beispiel #6
0
    def test_prepare_run_job(self, job, data_source, update,
                             remote, wf_factory, get_ds_urls,
                             prepare_cluster):
        wf_factory.return_value = mock.MagicMock()

        remote_class = mock.MagicMock()
        remote_class.__exit__.return_value = 'closed'
        remote.return_value = remote_class

        job_class = mock.MagicMock()
        job_class.name = "myJob"
        job.return_value = job_class

        source = mock.MagicMock()
        source.url = "localhost"

        get_ds_urls.return_value = ('url', 'url')

        data_source.return_value = source
        oje = FakeOozieJobEngine(u.create_cluster())
        _, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG)
        update.return_value = job_exec

        res = oje._prepare_run_job(job_exec)
        self.assertEqual(ctx.ctx(), res['context'])
        self.assertEqual('hadoop', res['hdfs_user'])
        self.assertEqual(job_exec, res['job_execution'])
        self.assertEqual({}, res['oozie_params'])
Beispiel #7
0
    def test_job_type_supported(self, job_get):
        job, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG)
        job_get.return_value = job
        self.assertIsNotNone(job_manager._get_job_engine(u.create_cluster(), job_exec))

        job.type = "unsupported_type"
        self.assertIsNone(job_manager._get_job_engine(u.create_cluster(), job_exec))
Beispiel #8
0
    def test_build_workflow_for_job_java(self):
        # If args include swift paths, user and password values
        # will have to be supplied via configs instead of being
        # lifted from input or output data sources
        configs = {sw.HADOOP_SWIFT_USERNAME: '******',
                   sw.HADOOP_SWIFT_PASSWORD: '******'}

        configs = {
            'configs': configs,
            'args': ['swift://ex/i',
                     'output_path']
        }

        job, job_exec = u.create_job_exec(edp.JOB_TYPE_JAVA, configs)
        res = workflow_factory.get_workflow_xml(
            job, u.create_cluster(), job_exec)

        self.assertIn("""
      <configuration>
        <property>
          <name>fs.swift.service.sahara.password</name>
          <value>admin1</value>
        </property>
        <property>
          <name>fs.swift.service.sahara.username</name>
          <value>admin</value>
        </property>
      </configuration>
      <main-class>%s</main-class>
      <java-opts>%s</java-opts>
      <arg>swift://ex.sahara/i</arg>
      <arg>output_path</arg>""" % (_java_main_class, _java_opts), res)
Beispiel #9
0
    def test_build_workflow_for_job_hive(self, job_binary):

        job, job_exec = u.create_job_exec(edp.JOB_TYPE_HIVE)
        job_binary.return_value = {"name": "script.q"}

        input_data = u.create_data_source('swift://ex/i')
        output_data = u.create_data_source('swift://ex/o')

        res = workflow_factory.get_workflow_xml(
            job, u.create_cluster(), job_exec, input_data, output_data,
            'hadoop')

        self.assertIn("""
      <job-xml>/user/hadoop/conf/hive-site.xml</job-xml>
      <configuration>
        <property>
          <name>fs.swift.service.sahara.password</name>
          <value>admin1</value>
        </property>
        <property>
          <name>fs.swift.service.sahara.username</name>
          <value>admin</value>
        </property>
      </configuration>
      <script>script.q</script>
      <param>INPUT=swift://ex.sahara/i</param>
      <param>OUTPUT=swift://ex.sahara/o</param>""", res)
Beispiel #10
0
    def test_run_job(self, exec_get, job, data_source,
                     update, remote, wf_factory, get_ds_urls,
                     prepare_cluster):
        wf_factory.return_value = mock.MagicMock()
        remote_class = mock.MagicMock()
        remote_class.__exit__.return_value = 'closed'
        remote.return_value = remote_class

        job_class = mock.MagicMock()
        job.return_value = job_class
        job.name = "myJob"

        source = mock.MagicMock()
        source.url = "localhost"
        data_source.return_value = source

        get_ds_urls.return_value = ('url', 'url')

        oje = FakeOozieJobEngine(u.create_cluster())
        client_class = mock.MagicMock()
        client_class.add_job = mock.MagicMock(return_value=1)
        client_class.get_job_info = mock.MagicMock(
            return_value={'status': 'PENDING'})
        oje.get_client = mock.MagicMock(return_value=client_class)

        _, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG)
        update.return_value = job_exec

        self.assertEqual((1, 'PENDING', None), oje.run_job(job_exec))
Beispiel #11
0
    def _build_workflow_with_conf_common(self, job_type):

        input_data = u.create_data_source('swift://ex/i')
        output_data = u.create_data_source('swift://ex/o')

        job, job_exec = u.create_job_exec(job_type,
                                          configs={"configs": {'c': 'f'}})

        res = workflow_factory.get_workflow_xml(
            job, u.create_cluster(), job_exec, input_data, output_data,
            'hadoop')

        self.assertIn("""
        <property>
          <name>c</name>
          <value>f</value>
        </property>""", res)

        self.assertIn("""
        <property>
          <name>mapred.input.dir</name>
          <value>swift://ex.sahara/i</value>
        </property>""", res)

        self.assertIn("""
        <property>
          <name>mapred.output.dir</name>
          <value>swift://ex.sahara/o</value>
        </property>""", res)
Beispiel #12
0
    def test_run_job_handles_exceptions(self, runjob, job_ex_upd):
        runjob.side_effect = ex.SwiftClientException("Unauthorised")
        job, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG)
        job_manager.run_job(job_exec.id)

        self.assertEqual(1, job_ex_upd.call_count)

        new_status = job_ex_upd.call_args[0][2]["info"]["status"]
        self.assertEqual(edp.JOB_STATUS_FAILED, new_status)
Beispiel #13
0
    def test_hdfs_create_workflow_dir(self, remote):
        remote_class = mock.MagicMock()
        remote_class.__exit__.return_value = 'closed'
        remote.return_value = remote_class

        oje = FakeOozieJobEngine(u.create_cluster())
        job, _ = u.create_job_exec(edp.JOB_TYPE_PIG)
        res = oje._create_hdfs_workflow_dir(mock.Mock(), job)
        self.assertIn('/user/hadoop/special_name/', res)
Beispiel #14
0
    def test_build_workflow_for_job_java_with_adapter(self, edp_conf_mock):
        edp_conf_mock.return_value = True

        configs = {"configs": {"edp.java.main_class": "some_main"}}
        job, job_exec = u.create_job_exec(edp.JOB_TYPE_JAVA, configs)
        res = workflow_factory.get_workflow_xml(job, u.create_cluster(), job_exec.job_configs)

        self.assertIn("<main-class>org.openstack.sahara.edp.MainWrapper</main-class>", res)
        self.assertNotIn("some_main", res)
Beispiel #15
0
    def test_run_job_unsupported_type(self, cluster_get, job_exec_get, job_get):
        job, job_exec = u.create_job_exec("unsupported_type")
        job_exec_get.return_value = job_exec
        job_get.return_value = job

        cluster = u.create_cluster()
        cluster.status = c_u.CLUSTER_STATUS_ACTIVE
        cluster_get.return_value = cluster
        with testtools.ExpectedException(ex.EDPError):
            job_manager._run_job(job_exec.id)
Beispiel #16
0
    def test_run_job_unsupported_type(self,
                                      cluster_get, job_exec_get, job_get):
        job, job_exec = u.create_job_exec("unsupported_type")
        job_exec_get.return_value = job_exec
        job_get.return_value = job

        cluster = u.create_cluster()
        cluster.status = c_u.CLUSTER_STATUS_ACTIVE
        cluster_get.return_value = cluster
        with testtools.ExpectedException(ex.EDPError):
            job_manager._run_job(job_exec.id)
Beispiel #17
0
    def test_build_workflow_for_job_shell(self):
        configs = {"configs": {"k1": "v1"}, "params": {"p1": "v1"}, "args": ["a1", "a2"]}
        job, job_exec = u.create_job_exec(edp.JOB_TYPE_SHELL, configs)
        res = workflow_factory.get_workflow_xml(job, u.create_cluster(), job_exec.job_configs)

        self.assertIn("<name>k1</name>", res)
        self.assertIn("<value>v1</value>", res)

        self.assertIn("<env-var>p1=v1</env-var>", res)

        self.assertIn("<argument>a1</argument>", res)
        self.assertIn("<argument>a2</argument>", res)
Beispiel #18
0
    def test_suspend_unsuspendible_job(self, suspend_job_get, cluster_get,
                                       job_exec_get, job_get):
        info = {'status': edp.JOB_STATUS_SUCCEEDED}
        job, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG, None, False, info)
        job_exec_get.return_value = job_exec
        job_get.return_value = job

        cluster = u.create_cluster()
        cluster.status = "Active"
        cluster_get.return_value = cluster

        self.assertEqual(0, suspend_job_get.call_count)
Beispiel #19
0
    def test_build_workflow_for_job_java_with_adapter(self, edp_conf_mock):
        edp_conf_mock.return_value = True

        configs = {"configs": {"edp.java.main_class": "some_main"}}
        job, job_exec = u.create_job_exec(edp.JOB_TYPE_JAVA, configs)
        res = workflow_factory.get_workflow_xml(
            job, u.create_cluster(), job_exec.job_configs)

        self.assertIn(
            "<main-class>org.openstack.sahara.edp.MainWrapper</main-class>",
            res)
        self.assertNotIn("some_main", res)
Beispiel #20
0
    def test_get_job_status(self):
        oje = FakeOozieJobEngine(u.create_cluster())
        client_class = mock.MagicMock()
        client_class.add_job = mock.MagicMock(return_value=1)
        client_class.get_job_info = mock.MagicMock(
            return_value={'status': 'PENDING'})
        oje.get_client = mock.MagicMock(return_value=client_class)

        _, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG)
        self.assertIsNone(oje.get_job_status(job_exec))

        job_exec.engine_job_id = 1
        self.assertEqual({'status': 'PENDING'}, oje.get_job_status(job_exec))
Beispiel #21
0
    def test_scheduled_edp_job_run(self, job_exec_get, cluster_get, job_get, run_scheduled_job):
        configs = {"job_execution_info": {"job_execution_type": "scheduled", "start": "2015-5-15T01:00Z"}}
        job, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG, configs)
        job_exec_get.return_value = job_exec
        job_get.return_value = job

        cluster = u.create_cluster()
        cluster.status = "Active"
        cluster_get.return_value = cluster

        job_manager._run_job(job_exec.id)

        self.assertEqual(1, run_scheduled_job.call_count)
Beispiel #22
0
    def test_get_job_status(self):
        oje = FakeOozieJobEngine(u.create_cluster())
        client_class = mock.MagicMock()
        client_class.add_job = mock.MagicMock(return_value=1)
        client_class.get_job_info = mock.MagicMock(
            return_value={'status': 'PENDING'})
        oje.get_client = mock.MagicMock(return_value=client_class)

        _, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG)
        self.assertIsNone(oje.get_job_status(job_exec))

        job_exec.engine_job_id = 1
        self.assertEqual({'status': 'PENDING'}, oje.get_job_status(job_exec))
Beispiel #23
0
    def test_run_job_handles_exceptions_with_run_job(self, canceljob, runjob, job_ex_get, job_ex_upd):
        runjob.side_effect = ex.OozieException("run_job failed")
        job, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG)
        job_exec.engine_job_id = "fake_oozie_id"
        job_ex_get.return_value = job_exec

        job_manager.run_job(job_exec.id)

        self.assertEqual(1, job_ex_get.call_count)
        self.assertEqual(1, job_ex_upd.call_count)

        new_status = job_ex_upd.call_args[0][2]["info"]["status"]
        self.assertEqual(edp.JOB_STATUS_FAILED, new_status)
        self.assertEqual(1, canceljob.call_count)
Beispiel #24
0
    def test_cancel_job(self, kill_get, info_get):
        info_get.return_value = {}
        oje = FakeOozieJobEngine(u.create_cluster())
        _, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG)

        # test cancel job without engine_job_id
        job_exec.engine_job_id = None
        oje.cancel_job(job_exec)
        self.assertEqual(0, kill_get.call_count)

        # test cancel job with engine_job_id
        job_exec.engine_job_id = 123
        oje.cancel_job(job_exec)
        self.assertEqual(1, kill_get.call_count)
Beispiel #25
0
    def test_suspend_unsuspendible_job(self, suspend_job_get,
                                       cluster_get, job_exec_get, job_get):
        info = {
            'status': edp.JOB_STATUS_SUCCEEDED
        }
        job, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG, None, False, info)
        job_exec_get.return_value = job_exec
        job_get.return_value = job

        cluster = u.create_cluster()
        cluster.status = "Active"
        cluster_get.return_value = cluster

        self.assertEqual(0, suspend_job_get.call_count)
Beispiel #26
0
    def test_get_data_sources(self, ds):
        def _conductor_data_source_get(ctx, id):
            return mock.Mock(id=id, url="obj_" + id)

        job, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG)

        job_exec.input_id = "s1"
        job_exec.output_id = "s2"

        ds.side_effect = _conductor_data_source_get
        input_source, output_source = job_utils.get_data_sources(job_exec, job, {})

        self.assertEqual("obj_s1", input_source.url)
        self.assertEqual("obj_s2", output_source.url)
Beispiel #27
0
    def test_get_data_sources_with_null_id(self):
        configs = {sw.HADOOP_SWIFT_USERNAME: "******", sw.HADOOP_SWIFT_PASSWORD: "******"}

        configs = {"configs": configs, "args": ["swift://ex/i", "output_path"]}

        job, job_exec = u.create_job_exec(edp.JOB_TYPE_JAVA, configs)

        job_exec.input_id = None
        job_exec.output_id = None

        input_source, output_source = job_utils.get_data_sources(job_exec, job, {})

        self.assertIsNone(input_source)
        self.assertIsNone(output_source)
Beispiel #28
0
    def test_cancel_job(self, kill_get, info_get):
        info_get.return_value = {}
        oje = FakeOozieJobEngine(u.create_cluster())
        _, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG)

        # test cancel job without engine_job_id
        job_exec.engine_job_id = None
        oje.cancel_job(job_exec)
        self.assertEqual(0, kill_get.call_count)

        # test cancel job with engine_job_id
        job_exec.engine_job_id = 123
        oje.cancel_job(job_exec)
        self.assertEqual(1, kill_get.call_count)
Beispiel #29
0
    def test_get_data_sources_java(self):
        configs = {
            sw.HADOOP_SWIFT_USERNAME: '******',
            sw.HADOOP_SWIFT_PASSWORD: '******'
        }

        configs = {'configs': configs, 'args': ['swift://ex/i', 'output_path']}

        job, job_exec = u.create_job_exec(edp.JOB_TYPE_JAVA, configs)

        input_source, output_source = (job_utils.get_data_sources(
            job_exec, job))

        self.assertEqual(None, input_source)
        self.assertEqual(None, output_source)
Beispiel #30
0
    def test_get_input_output_data_sources(self, ds):
        def _conductor_data_source_get(ctx, id):
            return mock.Mock(id=id, url="hdfs://obj_" + id, type='hdfs')

        job, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG)

        job_exec.input_id = 's1'
        job_exec.output_id = 's2'

        ds.side_effect = _conductor_data_source_get
        input_source, output_source = (
            job_utils.get_input_output_data_sources(job_exec, job, {}))

        self.assertEqual('hdfs://obj_s1', input_source.url)
        self.assertEqual('hdfs://obj_s2', output_source.url)
Beispiel #31
0
    def test_get_data_sources(self, ds):
        def _conductor_data_source_get(ctx, id):
            return mock.Mock(id=id, url="obj_" + id)

        job, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG)

        job_exec.input_id = 's1'
        job_exec.output_id = 's2'

        ds.side_effect = _conductor_data_source_get
        input_source, output_source = (
            job_utils.get_data_sources(job_exec, job, {}))

        self.assertEqual('obj_s1', input_source.url)
        self.assertEqual('obj_s2', output_source.url)
Beispiel #32
0
    def test_run_job_handles_exceptions_with_run_job(self, canceljob, runjob,
                                                     job_ex_get, job_ex_upd):
        runjob.side_effect = ex.OozieException("run_job failed")
        job, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG)
        job_exec.engine_job_id = "fake_oozie_id"
        job_ex_get.return_value = job_exec

        job_manager.run_job(job_exec.id)

        self.assertEqual(1, job_ex_get.call_count)
        self.assertEqual(1, job_ex_upd.call_count)

        new_status = job_ex_upd.call_args[0][2]["info"]["status"]
        self.assertEqual(edp.JOB_STATUS_FAILED, new_status)
        self.assertEqual(1, canceljob.call_count)
Beispiel #33
0
    def test_build_workflow_for_job_shell(self):
        configs = {"configs": {"k1": "v1"},
                   "params": {"p1": "v1"},
                   "args": ["a1", "a2"]}
        job, job_exec = u.create_job_exec(edp.JOB_TYPE_SHELL, configs)
        res = workflow_factory.get_workflow_xml(
            job, u.create_cluster(), job_exec.job_configs)

        self.assertIn("<name>k1</name>", res)
        self.assertIn("<value>v1</value>", res)

        self.assertIn("<env-var>p1=v1</env-var>", res)

        self.assertIn("<argument>a1</argument>", res)
        self.assertIn("<argument>a2</argument>", res)
Beispiel #34
0
    def test_get_data_sources(self, ds):
        def _conductor_data_source_get(ctx, id):
            return "obj_" + id

        job, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG)

        job_exec.input_id = 's1'
        job_exec.output_id = 's2'

        ds.side_effect = _conductor_data_source_get
        input_source, output_source = (
            job_utils.get_data_sources(job_exec, job))

        self.assertEqual('obj_s1', input_source)
        self.assertEqual('obj_s2', output_source)
Beispiel #35
0
    def test_run_job_handles_exceptions(self, canceljob, runjob,
                                        job_ex_get, job_ex_upd):
        runjob.side_effect = ex.SwiftClientException("Unauthorised")
        job, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG)

        job_exec.engine_job_id = None
        job_ex_get.return_value = job_exec

        job_manager.run_job(job_exec.id)

        self.assertEqual(1, job_ex_get.call_count)
        self.assertEqual(1, job_ex_upd.call_count)

        new_status = job_ex_upd.call_args[0][2]["info"]["status"]
        self.assertEqual(edp.JOB_STATUS_FAILED, new_status)
        self.assertEqual(0, canceljob.call_count)
Beispiel #36
0
    def _build_workflow_common(self, job_type, streaming=False):
        if streaming:
            configs = {'edp.streaming.mapper': '/usr/bin/cat',
                       'edp.streaming.reducer': '/usr/bin/wc'}
            configs = {'configs': configs}
        else:
            configs = {}

        job, job_exec = u.create_job_exec(job_type, configs)

        input_data = u.create_data_source('swift://ex/i')
        output_data = u.create_data_source('swift://ex/o')

        res = workflow_factory.get_workflow_xml(
            job, u.create_cluster(), job_exec, input_data, output_data,
            'hadoop')

        if streaming:
            self.assertIn("""
      <streaming>
        <mapper>/usr/bin/cat</mapper>
        <reducer>/usr/bin/wc</reducer>
      </streaming>""", res)

        self.assertIn("""
        <property>
          <name>mapred.output.dir</name>
          <value>swift://ex.sahara/o</value>
        </property>""", res)

        self.assertIn("""
        <property>
          <name>mapred.input.dir</name>
          <value>swift://ex.sahara/i</value>
        </property>""", res)

        self.assertIn("""
        <property>
          <name>fs.swift.service.sahara.password</name>
          <value>admin1</value>
        </property>""", res)

        self.assertIn("""
        <property>
          <name>fs.swift.service.sahara.username</name>
          <value>admin</value>
        </property>""", res)
Beispiel #37
0
    def test_failed_to_cancel_job(self, time_get, cluster_get, job_exec_get, job_get, job_execution_update_get):
        info = {"status": edp.JOB_STATUS_RUNNING}

        job, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG, None, False, info)
        job_exec_get.return_value = job_exec
        job_get.return_value = job

        cluster = u.create_cluster()
        cluster.status = c_u.CLUSTER_STATUS_ACTIVE
        cluster_get.return_value = cluster

        time_get.return_value = 10000

        job_execution_update_get.return_value = job_exec

        with testtools.ExpectedException(ex.CancelingFailed):
            job_manager.cancel_job(job_exec.id)
Beispiel #38
0
    def test_get_data_sources_java(self):
        configs = {sw.HADOOP_SWIFT_USERNAME: '******',
                   sw.HADOOP_SWIFT_PASSWORD: '******'}

        configs = {
            'configs': configs,
            'args': ['swift://ex/i',
                     'output_path']
        }

        job, job_exec = u.create_job_exec(edp.JOB_TYPE_JAVA, configs)

        input_source, output_source = (
            job_utils.get_data_sources(job_exec, job))

        self.assertEqual(None, input_source)
        self.assertEqual(None, output_source)
Beispiel #39
0
    def test_get_input_output_data_sources_with_null_id(self):
        configs = {
            sw.HADOOP_SWIFT_USERNAME: '******',
            sw.HADOOP_SWIFT_PASSWORD: '******'
        }

        configs = {'configs': configs, 'args': ['hdfs://ex/i', 'output_path']}

        job, job_exec = u.create_job_exec(edp.JOB_TYPE_JAVA, configs)

        job_exec.input_id = None
        job_exec.output_id = None

        input_source, output_source = (job_utils.get_input_output_data_sources(
            job_exec, job, {}))

        self.assertIsNone(input_source)
        self.assertIsNone(output_source)
Beispiel #40
0
    def test_failed_to_cancel_job(self, time_get, cluster_get, job_exec_get,
                                  job_get, job_execution_update_get):
        info = {'status': edp.JOB_STATUS_RUNNING}

        job, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG, None, False, info)
        job_exec_get.return_value = job_exec
        job_get.return_value = job

        cluster = u.create_cluster()
        cluster.status = c_u.CLUSTER_STATUS_ACTIVE
        cluster_get.return_value = cluster

        time_get.return_value = 10000

        job_execution_update_get.return_value = job_exec

        with testtools.ExpectedException(ex.CancelingFailed):
            job_manager.cancel_job(job_exec.id)
Beispiel #41
0
    def test_scheduled_edp_job_run(self, job_exec_get, cluster_get,
                                   job_get, run_scheduled_job):
        configs = {
            'job_execution_info': {
                'job_execution_type': 'scheduled',
                'start': '2015-5-15T01:00Z'
            }
        }
        job, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG, configs)
        job_exec_get.return_value = job_exec
        job_get.return_value = job

        cluster = u.create_cluster()
        cluster.status = "Active"
        cluster_get.return_value = cluster

        job_manager._run_job(job_exec.id)

        self.assertEqual(1, run_scheduled_job.call_count)
Beispiel #42
0
    def test_get_data_sources_with_null_id(self):
        configs = {sw.HADOOP_SWIFT_USERNAME: '******',
                   sw.HADOOP_SWIFT_PASSWORD: '******'}

        configs = {
            'configs': configs,
            'args': ['swift://ex/i',
                     'output_path']
        }

        job, job_exec = u.create_job_exec(edp.JOB_TYPE_JAVA, configs)

        job_exec.input_id = None
        job_exec.output_id = None

        input_source, output_source = (
            job_utils.get_data_sources(job_exec, job, {}))

        self.assertIsNone(input_source)
        self.assertIsNone(output_source)
Beispiel #43
0
    def test_build_workflow_for_job_java(self):
        # If args include swift paths, user and password values
        # will have to be supplied via configs instead of being
        # lifted from input or output data sources
        configs = {sw.HADOOP_SWIFT_USERNAME: '******',
                   sw.HADOOP_SWIFT_PASSWORD: '******'}

        configs = {
            'configs': configs,
            'args': ['swift://ex/i',
                     'output_path']
        }

        job, job_exec = u.create_job_exec(edp.JOB_TYPE_JAVA, configs)
        res = workflow_factory.get_workflow_xml(
            job, u.create_cluster(), job_exec)

        self.assertIn("""
      <configuration>
        <property>
          <name>fs.swift.service.sahara.password</name>
          <value>admin1</value>
        </property>
        <property>
          <name>fs.swift.service.sahara.username</name>
          <value>admin</value>
        </property>
      </configuration>
      <main-class>%s</main-class>
      <java-opts>%s</java-opts>
      <arg>swift://ex.sahara/i</arg>
      <arg>output_path</arg>""" % (_java_main_class, _java_opts), res)

        # testing workflow creation with a proxy domain
        self.override_config('use_domain_for_proxy_users', True)
        self.override_config("proxy_user_domain_name", 'sahara_proxy_domain')
        configs = {
            'configs': {},
            'args': ['swift://ex/i',
                     'output_path']
        }

        job, job_exec = u.create_job_exec(edp.JOB_TYPE_JAVA, configs,
                                          proxy=True)
        res = workflow_factory.get_workflow_xml(job, u.create_cluster(),
                                                job_exec)

        self.assertIn("""
      <configuration>
        <property>
          <name>fs.swift.service.sahara.domain.name</name>
          <value>sahara_proxy_domain</value>
        </property>
        <property>
          <name>fs.swift.service.sahara.password</name>
          <value>55555555-6666-7777-8888-999999999999</value>
        </property>
        <property>
          <name>fs.swift.service.sahara.trust.id</name>
          <value>0123456789abcdef0123456789abcdef</value>
        </property>
        <property>
          <name>fs.swift.service.sahara.username</name>
          <value>job_00000000-1111-2222-3333-4444444444444444</value>
        </property>
      </configuration>
      <main-class>%s</main-class>
      <java-opts>%s</java-opts>
      <arg>swift://ex.sahara/i</arg>
      <arg>output_path</arg>""" % (_java_main_class, _java_opts), res)
Beispiel #44
0
    def test_build_workflow_for_job_hive(self, job_binary):

        job, job_exec = u.create_job_exec(edp.JOB_TYPE_HIVE, configs={})
        job_binary.return_value = {"name": "script.q"}

        input_data = u.create_data_source('swift://ex/i')
        output_data = u.create_data_source('swift://ex/o')
        data_source_urls = {
            input_data.id: input_data.url,
            output_data.id: output_data.url
        }

        res = workflow_factory.get_workflow_xml(job, u.create_cluster(),
                                                job_exec.job_configs,
                                                input_data, output_data,
                                                'hadoop', data_source_urls)

        doc = xml.parseString(res)
        hive = doc.getElementsByTagName('hive')[0]
        self.assertEqual('/user/hadoop/conf/hive-site.xml',
                         xmlutils.get_text_from_node(hive, 'job-xml'))

        configuration = hive.getElementsByTagName('configuration')
        properties = xmlutils.get_property_dict(configuration[0])
        self.assertEqual(
            {
                'fs.swift.service.sahara.password': '******',
                'fs.swift.service.sahara.username': '******'
            }, properties)

        self.assertEqual('script.q',
                         xmlutils.get_text_from_node(hive, 'script'))

        params = xmlutils.get_param_dict(hive)
        self.assertEqual(
            {
                'INPUT': 'swift://ex.sahara/i',
                'OUTPUT': 'swift://ex.sahara/o'
            }, params)

        # testing workflow creation with a proxy domain
        self.override_config('use_domain_for_proxy_users', True)
        self.override_config("proxy_user_domain_name", 'sahara_proxy_domain')

        job, job_exec = u.create_job_exec(edp.JOB_TYPE_HIVE, proxy=True)

        res = workflow_factory.get_workflow_xml(job, u.create_cluster(),
                                                job_exec.job_configs,
                                                input_data, output_data,
                                                'hadoop', data_source_urls)

        doc = xml.parseString(res)
        hive = doc.getElementsByTagName('hive')[0]
        configuration = hive.getElementsByTagName('configuration')
        properties = xmlutils.get_property_dict(configuration[0])
        self.assertEqual(
            {
                'fs.swift.service.sahara.domain.name':
                'sahara_proxy_domain',
                'fs.swift.service.sahara.trust.id':
                '0123456789abcdef0123456789abcdef',
                'fs.swift.service.sahara.password':
                '******',
                'fs.swift.service.sahara.username':
                '******'
            }, properties)
Beispiel #45
0
    def test_build_workflow_for_job_java(self):
        # If args include swift paths, user and password values
        # will have to be supplied via configs instead of being
        # lifted from input or output data sources
        configs = {sw.HADOOP_SWIFT_USERNAME: '******',
                   sw.HADOOP_SWIFT_PASSWORD: '******'}

        configs = {
            'configs': configs,
            'args': ['swift://ex/i',
                     'output_path']
        }

        job, job_exec = u.create_job_exec(edp.JOB_TYPE_JAVA, configs)
        res = workflow_factory.get_workflow_xml(
            job, u.create_cluster(), job_exec.job_configs)

        self.assertIn("""
      <configuration>
        <property>
          <name>fs.swift.service.sahara.password</name>
          <value>admin1</value>
        </property>
        <property>
          <name>fs.swift.service.sahara.username</name>
          <value>admin</value>
        </property>
      </configuration>
      <main-class>%s</main-class>
      <java-opts>%s</java-opts>
      <arg>swift://ex.sahara/i</arg>
      <arg>output_path</arg>""" % (_java_main_class, _java_opts), res)

        # testing workflow creation with a proxy domain
        self.override_config('use_domain_for_proxy_users', True)
        self.override_config("proxy_user_domain_name", 'sahara_proxy_domain')
        configs = {
            'configs': {},
            'args': ['swift://ex/i',
                     'output_path']
        }

        job, job_exec = u.create_job_exec(edp.JOB_TYPE_JAVA, configs,
                                          proxy=True)
        res = workflow_factory.get_workflow_xml(job, u.create_cluster(),
                                                job_exec.job_configs)

        self.assertIn("""
      <configuration>
        <property>
          <name>fs.swift.service.sahara.domain.name</name>
          <value>sahara_proxy_domain</value>
        </property>
        <property>
          <name>fs.swift.service.sahara.password</name>
          <value>55555555-6666-7777-8888-999999999999</value>
        </property>
        <property>
          <name>fs.swift.service.sahara.trust.id</name>
          <value>0123456789abcdef0123456789abcdef</value>
        </property>
        <property>
          <name>fs.swift.service.sahara.username</name>
          <value>job_00000000-1111-2222-3333-4444444444444444</value>
        </property>
      </configuration>
      <main-class>%s</main-class>
      <java-opts>%s</java-opts>
      <arg>swift://ex.sahara/i</arg>
      <arg>output_path</arg>""" % (_java_main_class, _java_opts), res)
Beispiel #46
0
    def test_build_workflow_swift_configs(self, job_binary):

        # Test that swift configs come from either input or output data sources
        job, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG, configs={})
        job_binary.return_value = {"name": "script.pig"}

        input_data = u.create_data_source('swift://ex/i')
        output_data = u.create_data_source('hdfs://user/hadoop/out')
        data_source_urls = {input_data.id: input_data.url,
                            output_data.id: output_data.url}

        res = workflow_factory.get_workflow_xml(
            job, u.create_cluster(), job_exec.job_configs,
            input_data, output_data, 'hadoop', data_source_urls)

        self.assertIn("""
      <configuration>
        <property>
          <name>fs.swift.service.sahara.password</name>
          <value>admin1</value>
        </property>
        <property>
          <name>fs.swift.service.sahara.username</name>
          <value>admin</value>
        </property>
      </configuration>""", res)

        input_data = u.create_data_source('hdfs://user/hadoop/in')
        output_data = u.create_data_source('swift://ex/o')
        data_source_urls = {input_data.id: input_data.url,
                            output_data.id: output_data.url}

        res = workflow_factory.get_workflow_xml(
            job, u.create_cluster(), job_exec.job_configs,
            input_data, output_data, 'hadoop', data_source_urls)

        self.assertIn("""
      <configuration>
        <property>
          <name>fs.swift.service.sahara.password</name>
          <value>admin1</value>
        </property>
        <property>
          <name>fs.swift.service.sahara.username</name>
          <value>admin</value>
        </property>
      </configuration>""", res)

        job, job_exec = u.create_job_exec(
            edp.JOB_TYPE_PIG, configs={'configs': {'dummy': 'value'}})
        input_data = u.create_data_source('hdfs://user/hadoop/in')
        output_data = u.create_data_source('hdfs://user/hadoop/out')
        data_source_urls = {input_data.id: input_data.url,
                            output_data.id: output_data.url}

        res = workflow_factory.get_workflow_xml(
            job, u.create_cluster(), job_exec.job_configs,
            input_data, output_data, 'hadoop', data_source_urls)

        self.assertIn("""
      <configuration>
        <property>
          <name>dummy</name>
          <value>value</value>
        </property>
      </configuration>""", res)
Beispiel #47
0
    def test_build_workflow_for_job_pig(self, job_binary):

        job, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG, configs={})
        job_binary.return_value = {"name": "script.pig"}

        input_data = u.create_data_source('swift://ex/i')
        output_data = u.create_data_source('swift://ex/o')
        data_source_urls = {
            input_data.id: input_data.url,
            output_data.id: output_data.url
        }

        res = workflow_factory.get_workflow_xml(job, u.create_cluster(),
                                                job_exec.job_configs,
                                                input_data, output_data,
                                                'hadoop', data_source_urls)

        self.assertIn(
            """
      <param>INPUT=swift://ex.sahara/i</param>
      <param>OUTPUT=swift://ex.sahara/o</param>""", res)

        self.assertIn(
            """
      <configuration>
        <property>
          <name>fs.swift.service.sahara.password</name>
          <value>admin1</value>
        </property>
        <property>
          <name>fs.swift.service.sahara.username</name>
          <value>admin</value>
        </property>
      </configuration>""", res)

        self.assertIn("<script>script.pig</script>", res)

        # testing workflow creation with a proxy domain
        self.override_config('use_domain_for_proxy_users', True)
        self.override_config("proxy_user_domain_name", 'sahara_proxy_domain')
        job, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG, proxy=True)

        res = workflow_factory.get_workflow_xml(job, u.create_cluster(),
                                                job_exec.job_configs,
                                                input_data, output_data,
                                                'hadoop', data_source_urls)

        self.assertIn(
            """
      <configuration>
        <property>
          <name>fs.swift.service.sahara.domain.name</name>
          <value>sahara_proxy_domain</value>
        </property>
        <property>
          <name>fs.swift.service.sahara.password</name>
          <value>55555555-6666-7777-8888-999999999999</value>
        </property>
        <property>
          <name>fs.swift.service.sahara.trust.id</name>
          <value>0123456789abcdef0123456789abcdef</value>
        </property>
        <property>
          <name>fs.swift.service.sahara.username</name>
          <value>job_00000000-1111-2222-3333-4444444444444444</value>
        </property>
      </configuration>""", res)
Beispiel #48
0
    def _build_workflow_common(self, job_type, streaming=False, proxy=False):
        if streaming:
            configs = {'edp.streaming.mapper': '/usr/bin/cat',
                       'edp.streaming.reducer': '/usr/bin/wc'}
            configs = {'configs': configs}
        else:
            configs = {}

        job, job_exec = u.create_job_exec(job_type, configs)

        input_data = u.create_data_source('swift://ex/i')
        output_data = u.create_data_source('swift://ex/o')
        data_source_urls = {input_data.id: input_data.url,
                            output_data.id: output_data.url}

        res = workflow_factory.get_workflow_xml(
            job, u.create_cluster(), job_exec.job_configs,
            input_data, output_data, 'hadoop', data_source_urls)

        if streaming:
            self.assertIn("""
      <streaming>
        <mapper>/usr/bin/cat</mapper>
        <reducer>/usr/bin/wc</reducer>
      </streaming>""", res)

        self.assertIn("""
        <property>
          <name>mapred.output.dir</name>
          <value>swift://ex.sahara/o</value>
        </property>""", res)

        self.assertIn("""
        <property>
          <name>mapred.input.dir</name>
          <value>swift://ex.sahara/i</value>
        </property>""", res)

        if not proxy:
            self.assertIn("""
        <property>
          <name>fs.swift.service.sahara.password</name>
          <value>admin1</value>
        </property>""", res)

            self.assertIn("""
        <property>
          <name>fs.swift.service.sahara.username</name>
          <value>admin</value>
        </property>""", res)
        else:
            # testing workflow creation with a proxy domain
            self.override_config('use_domain_for_proxy_users', True)
            self.override_config("proxy_user_domain_name",
                                 'sahara_proxy_domain')
            job, job_exec = u.create_job_exec(job_type, proxy=True)

            res = workflow_factory.get_workflow_xml(
                job, u.create_cluster(), job_exec.job_configs,
                input_data, output_data, 'hadoop', data_source_urls)

            self.assertIn("""
        <property>
          <name>fs.swift.service.sahara.domain.name</name>
          <value>sahara_proxy_domain</value>
        </property>
        <property>
          <name>fs.swift.service.sahara.password</name>
          <value>55555555-6666-7777-8888-999999999999</value>
        </property>
        <property>
          <name>fs.swift.service.sahara.trust.id</name>
          <value>0123456789abcdef0123456789abcdef</value>
        </property>
        <property>
          <name>fs.swift.service.sahara.username</name>
          <value>job_00000000-1111-2222-3333-4444444444444444</value>
        </property>""", res)