Example #1
0
    def test_build_workflow_swift_configs(self, job_binary):

        # Test that swift configs come from either input or output data sources
        job, job_exec = _create_all_stack(edp.JOB_TYPE_PIG)
        job_binary.return_value = {"name": "script.pig"}

        input_data = _create_data_source('swift://ex/i')
        output_data = _create_data_source('hdfs://user/hadoop/out')

        res = workflow_factory.get_workflow_xml(
            job, _create_cluster(), job_exec, input_data, output_data)

        self.assertIn("""
      <configuration>
        <property>
          <name>fs.swift.service.sahara.password</name>
          <value>admin1</value>
        </property>
        <property>
          <name>fs.swift.service.sahara.username</name>
          <value>admin</value>
        </property>
      </configuration>""", res)

        input_data = _create_data_source('hdfs://user/hadoop/in')
        output_data = _create_data_source('swift://ex/o')

        res = workflow_factory.get_workflow_xml(
            job, _create_cluster(), job_exec, input_data, output_data)

        self.assertIn("""
      <configuration>
        <property>
          <name>fs.swift.service.sahara.password</name>
          <value>admin1</value>
        </property>
        <property>
          <name>fs.swift.service.sahara.username</name>
          <value>admin</value>
        </property>
      </configuration>""", res)

        job, job_exec = _create_all_stack(
            edp.JOB_TYPE_PIG, configs={'configs': {'dummy': 'value'}})
        input_data = _create_data_source('hdfs://user/hadoop/in')
        output_data = _create_data_source('hdfs://user/hadoop/out')

        res = workflow_factory.get_workflow_xml(
            job, _create_cluster(), job_exec, input_data, output_data)

        self.assertIn("""
      <configuration>
        <property>
          <name>dummy</name>
          <value>value</value>
        </property>
      </configuration>""", res)
Example #2
0
    def test_build_workflow_for_job_pig(self, job_binary):

        job, job_exec = _create_all_stack(edp.JOB_TYPE_PIG)
        job_binary.return_value = {"name": "script.pig"}

        input_data = _create_data_source('swift://ex/i')
        output_data = _create_data_source('swift://ex/o')

        res = workflow_factory.get_workflow_xml(
            job, _create_cluster(), job_exec, input_data, output_data)

        self.assertIn("""
      <param>INPUT=swift://ex.sahara/i</param>
      <param>OUTPUT=swift://ex.sahara/o</param>""", res)

        self.assertIn("""
      <configuration>
        <property>
          <name>fs.swift.service.sahara.password</name>
          <value>admin1</value>
        </property>
        <property>
          <name>fs.swift.service.sahara.username</name>
          <value>admin</value>
        </property>
      </configuration>""", res)

        self.assertIn("<script>script.pig</script>", res)
Example #3
0
    def _build_workflow_with_conf_common(self, job_type):
        job, _ = _create_all_stack(job_type)

        input_data = _create_data_source('swift://ex/i')
        output_data = _create_data_source('swift://ex/o')

        job_exec = _create_job_exec(job.id,
                                    job_type, configs={"configs": {'c': 'f'}})

        res = workflow_factory.get_workflow_xml(
            job, _create_cluster(), job_exec, input_data, output_data)

        self.assertIn("""
        <property>
          <name>c</name>
          <value>f</value>
        </property>""", res)

        self.assertIn("""
        <property>
          <name>mapred.input.dir</name>
          <value>swift://ex.sahara/i</value>
        </property>""", res)

        self.assertIn("""
        <property>
          <name>mapred.output.dir</name>
          <value>swift://ex.sahara/o</value>
        </property>""", res)
Example #4
0
    def test_build_workflow_for_job_java(self):
        # If args include swift paths, user and password values
        # will have to be supplied via configs instead of being
        # lifted from input or output data sources
        configs = {sw.HADOOP_SWIFT_USERNAME: '******',
                   sw.HADOOP_SWIFT_PASSWORD: '******'}

        configs = {
            'configs': configs,
            'args': ['swift://ex/i',
                     'output_path']
        }

        job, job_exec = _create_all_stack(edp.JOB_TYPE_JAVA, configs)
        res = workflow_factory.get_workflow_xml(
            job, _create_cluster(), job_exec)

        self.assertIn("""
      <configuration>
        <property>
          <name>fs.swift.service.sahara.password</name>
          <value>admin1</value>
        </property>
        <property>
          <name>fs.swift.service.sahara.username</name>
          <value>admin</value>
        </property>
      </configuration>
      <main-class>%s</main-class>
      <java-opts>%s</java-opts>
      <arg>swift://ex.sahara/i</arg>
      <arg>output_path</arg>""" % (_java_main_class, _java_opts), res)
Example #5
0
    def _build_workflow_common(self, job_type, streaming=False):
        if streaming:
            configs = {'edp.streaming.mapper': '/usr/bin/cat',
                       'edp.streaming.reducer': '/usr/bin/wc'}
            configs = {'configs': configs}
        else:
            configs = {}

        job, job_exec = _create_all_stack(job_type, configs)

        input_data = _create_data_source('swift://ex/i')
        output_data = _create_data_source('swift://ex/o')

        res = workflow_factory.get_workflow_xml(
            job, _create_cluster(), job_exec, input_data, output_data)

        if streaming:
            self.assertIn("""
      <streaming>
        <mapper>/usr/bin/cat</mapper>
        <reducer>/usr/bin/wc</reducer>
      </streaming>""", res)

        self.assertIn("""
        <property>
          <name>mapred.output.dir</name>
          <value>swift://ex.sahara/o</value>
        </property>""", res)

        self.assertIn("""
        <property>
          <name>mapred.input.dir</name>
          <value>swift://ex.sahara/i</value>
        </property>""", res)

        self.assertIn("""
        <property>
          <name>fs.swift.service.sahara.password</name>
          <value>admin1</value>
        </property>""", res)

        self.assertIn("""
        <property>
          <name>fs.swift.service.sahara.username</name>
          <value>admin</value>
        </property>""", res)
Example #6
0
def _run_job(job_execution_id):
    ctx = context.ctx()

    job_execution = conductor.job_execution_get(ctx, job_execution_id)

    cluster = conductor.cluster_get(ctx, job_execution.cluster_id)
    if cluster.status != 'Active':
        return

    job_execution = _update_job_execution_extra(job_execution, cluster)

    job = conductor.job_get(ctx, job_execution.job_id)
    input_source, output_source = _get_data_sources(job_execution, job)

    for data_source in [input_source, output_source]:
        if data_source and data_source.type == 'hdfs':
            h.configure_cluster_for_hdfs(cluster, data_source)

    plugin = _get_plugin(cluster)
    hdfs_user = plugin.get_hdfs_user()
    oozie_server = plugin.get_oozie_server(cluster)

    wf_dir = create_workflow_dir(oozie_server, job, hdfs_user)
    upload_job_files(oozie_server, wf_dir, job, hdfs_user)

    wf_xml = workflow_factory.get_workflow_xml(
        job, cluster, job_execution, input_source, output_source)

    path_to_workflow = upload_workflow_file(oozie_server,
                                            wf_dir, wf_xml, hdfs_user)

    client = _create_oozie_client(cluster)
    job_params = _get_oozie_job_params(cluster, hdfs_user, path_to_workflow)
    oozie_job_id = client.add_job(x.create_hadoop_xml(job_params),
                                  job_execution)
    job_execution = conductor.job_execution_update(
        ctx, job_execution, {'oozie_job_id': oozie_job_id,
                             'start_time': datetime.datetime.now()})
    client.run_job(job_execution, oozie_job_id)