Example #1
0
File: tests.py Project: yjkim/hue
    def setUp(self):
        OozieServerProvider.setup_class()
        self.cluster.fs.do_as_user('test', self.cluster.fs.create_home_dir,
                                   '/user/jobsub_test')
        self.cluster.fs.do_as_superuser(self.cluster.fs.chmod,
                                        '/user/jobsub_test', 0777, True)
        self.client = make_logged_in_client(username='******')

        # Ensure access to MR folder.
        # Need to chmod because jobs are submitted as a
        # different user than what was previously used.
        for i in range(0, 10):
            try:
                self.cluster.fs.do_as_superuser(self.cluster.fs.chmod,
                                                '/tmp',
                                                0777,
                                                recursive=True)
                break
            except Exception, e:
                # chmod failure likely do to async processing of resource deletion.
                # If the directory has improper permissions, should fail later in the test case.
                LOG.warn(
                    "Received the following exception while change mode attempt %d of /tmp: %s"
                    % (i, str(e)))
                time.sleep(1)
Example #2
0
  def setUp(self):
    OozieServerProvider.setup_class()
    self.cluster.fs.do_as_user('test', self.cluster.fs.create_home_dir, '/user/jobsub_test')
    self.cluster.fs.do_as_superuser(self.cluster.fs.chmod, '/user/jobsub_test', 0777, True)
    self.client = make_logged_in_client(username='******')

    # Ensure access to MR folder
    self.cluster.fs.do_as_superuser(self.cluster.fs.chmod, '/tmp', 0777, recursive=True)
Example #3
0
    def setup_class(cls):
        OozieServerProvider.setup_class()
        if not cls.cluster.fs.exists("/tmp"):
            cls.cluster.fs.do_as_superuser(cls.cluster.fs.mkdir, "/tmp")
        cls.cluster.fs.do_as_superuser(cls.cluster.fs.chmod, "/tmp", 0777)

        # Install examples
        import jobsub.management.commands.jobsub_setup as jobsub_setup
        if not jobsub_setup.Command().has_been_setup():
            jobsub_setup.Command().handle()

        cls.sleep_design_id = OozieDesign.objects.get(name='sleep_job').id
Example #4
0
  def setup_class(cls):
    OozieServerProvider.setup_class()
    if not cls.cluster.fs.exists("/tmp"):
      cls.cluster.fs.do_as_superuser(cls.cluster.fs.mkdir, "/tmp")
    cls.cluster.fs.do_as_superuser(cls.cluster.fs.chmod, "/tmp", 0777)

    # Install examples
    import jobsub.management.commands.jobsub_setup as jobsub_setup
    if not jobsub_setup.Command().has_been_setup():
      jobsub_setup.Command().handle()

    cls.sleep_design_id = OozieDesign.objects.get(name='sleep_job').id
Example #5
0
    def setUp(self):
        OozieServerProvider.setup_class()
        self.cluster.fs.do_as_user('test', self.cluster.fs.create_home_dir,
                                   '/user/jobsub_test')
        self.cluster.fs.do_as_superuser(self.cluster.fs.chmod,
                                        '/user/jobsub_test', 0777, True)
        self.client = make_logged_in_client(username='******')

        # Ensure access to MR folder
        self.cluster.fs.do_as_superuser(self.cluster.fs.chmod,
                                        '/tmp',
                                        0777,
                                        recursive=True)
Example #6
0
    def setUp(self):
        """
    To clean: creating test1, test2, test3...users
    """
        TestJobBrowserWithHadoop.user_count += 1
        self.username = '******' + str(TestJobBrowserWithHadoop.user_count)
        self.home_dir = '/user/%s' % self.username
        self.cluster.fs.do_as_user(self.username,
                                   self.cluster.fs.create_home_dir,
                                   self.home_dir)

        self.client = make_logged_in_client(username=self.username,
                                            is_superuser=False,
                                            groupname='test')
        self.user = User.objects.get(username=self.username)
        grant_access(self.username, 'test', 'jobsub')
        grant_access(self.username, 'test', 'jobbrowser')
        grant_access(self.username, 'test', 'oozie')
        add_to_group(self.username)

        self.prev_user = self.cluster.fs.user
        self.cluster.fs.setuser(self.username)

        self.install_examples()
        self.design = self.create_design()

        # Run the sleep example, since it doesn't require user home directory
        design_id = self.design.id
        response = self.client.post(reverse('oozie:submit_workflow',
                                            args=[design_id]),
                                    data={
                                        u'form-MAX_NUM_FORMS': [u''],
                                        u'form-INITIAL_FORMS': [u'1'],
                                        u'form-0-name':
                                        [u'REDUCER_SLEEP_TIME'],
                                        u'form-0-value': [u'1'],
                                        u'form-TOTAL_FORMS': [u'1']
                                    },
                                    follow=True)
        oozie_jobid = response.context['oozie_workflow'].id
        OozieServerProvider.wait_until_completion(oozie_jobid,
                                                  timeout=120,
                                                  step=1)

        self.hadoop_job_id = get_hadoop_job_id(self.oozie, oozie_jobid, 1)
        self.hadoop_job_id_short = views.get_shorter_id(self.hadoop_job_id)
Example #7
0
  def setUp(self):
    OozieServerProvider.setup_class()
    self.cluster.fs.do_as_user('test', self.cluster.fs.create_home_dir, '/user/jobsub_test')
    self.cluster.fs.do_as_superuser(self.cluster.fs.chmod, '/user/jobsub_test', 0777, True)
    self.client = make_logged_in_client(username='******')

    # Ensure access to MR folder.
    # Need to chmod because jobs are submitted as a
    # different user than what was previously used.
    for i in range(0,10):
      try:
        self.cluster.fs.do_as_superuser(self.cluster.fs.chmod, '/tmp', 0777, recursive=True)
        break
      except Exception, e:
        # chmod failure likely do to async processing of resource deletion.
        # If the directory has improper permissions, should fail later in the test case.
        LOG.warn("Received the following exception while change mode attempt %d of /tmp: %s" % (i, str(e)))
        time.sleep(1)
Example #8
0
  def setUp(self):
    """
    To clean: creating test1, test2, test3...users
    """
    TestJobBrowserWithHadoop.user_count += 1
    self.username = '******' + str(TestJobBrowserWithHadoop.user_count)
    self.home_dir = '/user/%s' % self.username
    self.cluster.fs.do_as_user(self.username, self.cluster.fs.create_home_dir, self.home_dir)

    self.client = make_logged_in_client(username=self.username, is_superuser=False, groupname='test')
    self.user = User.objects.get(username=self.username)
    grant_access(self.username, 'test', 'jobsub')
    grant_access(self.username, 'test', 'jobbrowser')
    grant_access(self.username, 'test', 'oozie')
    add_to_group(self.username)

    self.prev_user = self.cluster.fs.user
    self.cluster.fs.setuser(self.username)

    self.install_examples()
    self.design = self.create_design()

    # Run the sleep example, since it doesn't require user home directory
    design_id = self.design.id
    response = self.client.post(reverse('oozie:submit_workflow',
                                args=[design_id]),
                                data={u'form-MAX_NUM_FORMS': [u''],
                                      u'form-INITIAL_FORMS': [u'1'],
                                      u'form-0-name': [u'REDUCER_SLEEP_TIME'],
                                      u'form-0-value': [u'1'],
                                      u'form-TOTAL_FORMS': [u'1']},
                                follow=True)
    oozie_jobid = response.context['oozie_workflow'].id
    OozieServerProvider.wait_until_completion(oozie_jobid, timeout=120, step=1)

    self.hadoop_job_id = get_hadoop_job_id(self.oozie, oozie_jobid, 1)
    self.hadoop_job_id_short = views.get_shorter_id(self.hadoop_job_id)
Example #9
0
File: tests.py Project: yjkim/hue
 def setup_class(cls):
     OozieServerProvider.setup_class()
     if not cls.cluster.fs.exists("/tmp"):
         cls.cluster.fs.do_as_superuser(cls.cluster.fs.mkdir, "/tmp")
     cls.cluster.fs.do_as_superuser(cls.cluster.fs.chmod, "/tmp", 0777)
Example #10
0
File: tests.py Project: yjkim/hue
    def test_failed_jobs(self):
        """
    Test jobs with genuine failure, not just killed
    """
        # Create design that will fail because the script file isn't there
        INPUT_DIR = self.home_dir + '/input'
        OUTPUT_DIR = self.home_dir + '/output'
        try:
            self.cluster.fs.mkdir(self.home_dir + "/jt-test_failed_jobs")
            self.cluster.fs.mkdir(INPUT_DIR)
            self.cluster.fs.rmtree(OUTPUT_DIR)
        except:
            # rmtree probably failed here.
            pass
        response = self.client.post(reverse(
            'jobsub.views.new_design', kwargs={'node_type': 'mapreduce'}), {
                'name': ['test_failed_jobs-1'],
                'description': ['description test_failed_jobs-1'],
                'args':
                '',
                'jar_path':
                '/user/hue/oozie/workspaces/lib/hadoop-examples.jar',
                'prepares':
                '[]',
                'archives':
                '[]',
                'files':
                '[]',
                'job_properties': [
                    '[{"name":"mapred.input.dir","value":"%s"},\
            {"name":"mapred.output.dir","value":"%s"},\
            {"name":"mapred.mapper.class","value":"org.apache.hadoop.mapred.lib.dne"},\
            {"name":"mapred.combiner.class","value":"org.apache.hadoop.mapred.lib.dne"},\
            {"name":"mapred.reducer.class","value":"org.apache.hadoop.mapred.lib.dne"}]'
                    % (INPUT_DIR, OUTPUT_DIR)
                ]
            },
                                    HTTP_X_REQUESTED_WITH='XMLHttpRequest',
                                    follow=True)

        # Submit the job
        design_dict = json.loads(response.content)
        design_id = int(design_dict['id'])
        response = self.client.post(reverse('oozie:submit_workflow',
                                            args=[design_id]),
                                    data={
                                        u'form-MAX_NUM_FORMS': [u''],
                                        u'form-INITIAL_FORMS': [u'1'],
                                        u'form-0-name':
                                        [u'REDUCER_SLEEP_TIME'],
                                        u'form-0-value': [u'1'],
                                        u'form-TOTAL_FORMS': [u'1']
                                    },
                                    follow=True)
        oozie_jobid = response.context['oozie_workflow'].id
        job = OozieServerProvider.wait_until_completion(oozie_jobid,
                                                        timeout=120,
                                                        step=1)
        hadoop_job_id = get_hadoop_job_id(self.oozie, oozie_jobid, 1)
        hadoop_job_id_short = views.get_shorter_id(hadoop_job_id)

        # Select only killed jobs (should be absent)
        # Taking advantage of the fact new jobs are at the top of the list!
        response = self.client.get('/jobbrowser/jobs/?state=killed')
        assert_false(hadoop_job_id_short in response.content)

        # Select only failed jobs (should be present)
        # Map job should succeed. Reduce job should fail.
        response = self.client.get('/jobbrowser/jobs/?state=failed')
        assert_true(hadoop_job_id_short in response.content)

        # The single job view should have the failed task table
        response = self.client.get('/jobbrowser/jobs/%s' % (hadoop_job_id, ))
        html = response.content.lower()
        assert_true('failed task' in html)

        # The map task should say success (empty input)
        map_task_id = hadoop_job_id.replace('job', 'task') + '_m_000000'
        response = self.client.get('/jobbrowser/jobs/%s/tasks/%s' %
                                   (hadoop_job_id, map_task_id))
        assert_true('succeed' in response.content)
        assert_true('failed' not in response.content)

        # The reduce task should say failed
        reduce_task_id = hadoop_job_id.replace('job', 'task') + '_r_000000'
        response = self.client.get('/jobbrowser/jobs/%s/tasks/%s' %
                                   (hadoop_job_id, reduce_task_id))
        assert_true('succeed' not in response.content)
        assert_true('failed' in response.content)

        # Selecting by failed state should include the failed map
        response = self.client.get(
            '/jobbrowser/jobs/%s/tasks?taskstate=failed' % (hadoop_job_id, ))
        assert_true('r_000000' in response.content)
        assert_true('m_000000' not in response.content)
Example #11
0
File: tests.py Project: yjkim/hue
    def test_job(self):
        """
    Test new job views.

    The status of the jobs should be the same as the status reported back by oozie.
    In this case, all jobs should succeed.
    """
        # Run the sleep example, since it doesn't require user home directory
        design_id = self.design.id
        response = self.client.post(reverse('oozie:submit_workflow',
                                            args=[design_id]),
                                    data={
                                        u'form-MAX_NUM_FORMS': [u''],
                                        u'form-INITIAL_FORMS': [u'1'],
                                        u'form-0-name':
                                        [u'REDUCER_SLEEP_TIME'],
                                        u'form-0-value': [u'1'],
                                        u'form-TOTAL_FORMS': [u'1']
                                    },
                                    follow=True)
        oozie_jobid = response.context['oozie_workflow'].id
        OozieServerProvider.wait_until_completion(oozie_jobid,
                                                  timeout=120,
                                                  step=1)
        hadoop_job_id = get_hadoop_job_id(self.oozie, oozie_jobid, 1)
        hadoop_job_id_short = views.get_shorter_id(hadoop_job_id)

        # All jobs page and fetch job ID
        # Taking advantage of the fact new jobs are at the top of the list!
        response = self.client.get('/jobbrowser/jobs/')
        assert_true(hadoop_job_id_short in response.content, response.content)

        # Make sure job succeeded
        response = self.client.get('/jobbrowser/jobs/?state=completed')
        assert_true(hadoop_job_id_short in response.content)
        response = self.client.get('/jobbrowser/jobs/?state=failed')
        assert_false(hadoop_job_id_short in response.content)
        response = self.client.get('/jobbrowser/jobs/?state=running')
        assert_false(hadoop_job_id_short in response.content)
        response = self.client.get('/jobbrowser/jobs/?state=killed')
        assert_false(hadoop_job_id_short in response.content)

        # Check sharing permissions
        # Login as ourself
        finish = SHARE_JOBS.set_for_testing(True)
        try:
            response = self.client.get('/jobbrowser/jobs/?user='******'/jobbrowser/jobs/?user='******'not_me',
                                              is_superuser=False,
                                              groupname='test')
        grant_access("not_me", "test", "jobbrowser")

        finish = SHARE_JOBS.set_for_testing(True)
        try:
            response = client_not_me.get('/jobbrowser/jobs/?user='******'/jobbrowser/jobs/?user='******'/jobbrowser/jobs/%s' % hadoop_job_id)

        # Check some counters for single job.
        counters = response.context['job'].counters
        counters_file_bytes_written = counters[
            'org.apache.hadoop.mapreduce.FileSystemCounter']['counters'][
                'FILE_BYTES_WRITTEN']
        assert_true(counters_file_bytes_written['map'] > 0)
        assert_true(counters_file_bytes_written['reduce'] > 0)

        # We can't just check the complete contents of the python map because the
        # SLOTS_MILLIS_* entries have a variable number of milliseconds from
        # run-to-run.
        assert_equal(
            response.context['job'].
            counters['org.apache.hadoop.mapreduce.JobCounter']['counters']
            ['TOTAL_LAUNCHED_MAPS']['total'], 2L)
        assert_equal(
            response.context['job'].
            counters['org.apache.hadoop.mapreduce.JobCounter']['counters']
            ['TOTAL_LAUNCHED_REDUCES']['total'], 1L)
        assert_equal(
            response.context['job'].
            counters['org.apache.hadoop.mapreduce.JobCounter']['counters']
            ['FALLOW_SLOTS_MILLIS_MAPS']['total'], 0L)
        assert_equal(
            response.context['job'].
            counters['org.apache.hadoop.mapreduce.JobCounter']['counters']
            ['FALLOW_SLOTS_MILLIS_REDUCES']['total'], 0L)
        assert_true(response.context['job'].
                    counters['org.apache.hadoop.mapreduce.JobCounter']
                    ['counters']['SLOTS_MILLIS_MAPS']['total'] > 0)
        assert_true(response.context['job'].
                    counters['org.apache.hadoop.mapreduce.JobCounter']
                    ['counters']['SLOTS_MILLIS_REDUCES']['total'] > 0)

        # There should be 4 tasks for this job: cleanup, setup, map, reduce
        response = self.client.get('/jobbrowser/jobs/%s/tasks' %
                                   (hadoop_job_id, ))
        assert_true(len(response.context['page'].object_list), 4)
        # Select by tasktype
        response = self.client.get(
            '/jobbrowser/jobs/%s/tasks?tasktype=reduce' % (hadoop_job_id, ))
        assert_true(len(response.context['page'].object_list), 1)
        # Select by taskstate
        response = self.client.get(
            '/jobbrowser/jobs/%s/tasks?taskstate=succeeded' %
            (hadoop_job_id, ))
        assert_true(len(response.context['page'].object_list), 4)
        # Select by text
        response = self.client.get('/jobbrowser/jobs/%s/tasks?tasktext=clean' %
                                   (hadoop_job_id, ))
        assert_true(len(response.context['page'].object_list), 1)

        # Test job single logs page
        response = self.client.get('/jobbrowser/jobs/%s/single_logs' %
                                   (hadoop_job_id))
        assert_true('syslog' in response.content)
        assert_true(
            '<div class="tab-pane active" id="logsSysLog">' in response.content
            or '<div class="tab-pane active" id="logsStdErr">'
            in response.content or  # Depending on Hadoop
            '<div class="tab-pane active" id="logsStdOut">'
            in response.content,  # For jenkins
            response.content)
Example #12
0
  def test_job(self):
    """
    Test new job views.

    The status of the jobs should be the same as the status reported back by oozie.
    In this case, all jobs should succeed.
    """
    # Run the sleep example, since it doesn't require user home directory
    design_id = self.design.id
    response = self.client.post(reverse('oozie:submit_workflow',
                                args=[design_id]),
                                data={u'form-MAX_NUM_FORMS': [u''],
                                      u'form-INITIAL_FORMS': [u'1'],
                                      u'form-0-name': [u'REDUCER_SLEEP_TIME'],
                                      u'form-0-value': [u'1'],
                                      u'form-TOTAL_FORMS': [u'1']},
                                follow=True)
    oozie_jobid = response.context['oozie_workflow'].id
    OozieServerProvider.wait_until_completion(oozie_jobid, timeout=120, step=1)
    hadoop_job_id = get_hadoop_job_id(self.oozie, oozie_jobid, 1)
    hadoop_job_id_short = views.get_shorter_id(hadoop_job_id)

    # All jobs page and fetch job ID
    # Taking advantage of the fact new jobs are at the top of the list!
    response = self.client.get('/jobbrowser/jobs/')
    assert_true(hadoop_job_id_short in response.content, response.content)

    # Make sure job succeeded
    response = self.client.get('/jobbrowser/jobs/?state=completed')
    assert_true(hadoop_job_id_short in response.content)
    response = self.client.get('/jobbrowser/jobs/?state=failed')
    assert_false(hadoop_job_id_short in response.content)
    response = self.client.get('/jobbrowser/jobs/?state=running')
    assert_false(hadoop_job_id_short in response.content)
    response = self.client.get('/jobbrowser/jobs/?state=killed')
    assert_false(hadoop_job_id_short in response.content)

    # Check sharing permissions
    # Login as ourself
    finish = SHARE_JOBS.set_for_testing(True)
    try:
      response = self.client.get('/jobbrowser/jobs/?user='******'/jobbrowser/jobs/?user='******'not_me', is_superuser=False, groupname='test')
    grant_access("not_me", "test", "jobbrowser")

    finish = SHARE_JOBS.set_for_testing(True)
    try:
      response = client_not_me.get('/jobbrowser/jobs/?user='******'/jobbrowser/jobs/?user='******'/jobbrowser/jobs/%s' % hadoop_job_id)

    # Check some counters for single job.
    counters = response.context['job'].counters
    counters_file_bytes_written = counters['org.apache.hadoop.mapreduce.FileSystemCounter']['counters']['FILE_BYTES_WRITTEN']
    assert_true(counters_file_bytes_written['map'] > 0)
    assert_true(counters_file_bytes_written['reduce'] > 0)

    # We can't just check the complete contents of the python map because the
    # SLOTS_MILLIS_* entries have a variable number of milliseconds from
    # run-to-run.
    assert_equal(response.context['job'].counters['org.apache.hadoop.mapreduce.JobCounter']['counters']['TOTAL_LAUNCHED_MAPS']['total'], 2L)
    assert_equal(response.context['job'].counters['org.apache.hadoop.mapreduce.JobCounter']['counters']['TOTAL_LAUNCHED_REDUCES']['total'], 1L)
    assert_equal(response.context['job'].counters['org.apache.hadoop.mapreduce.JobCounter']['counters']['FALLOW_SLOTS_MILLIS_MAPS']['total'], 0L)
    assert_equal(response.context['job'].counters['org.apache.hadoop.mapreduce.JobCounter']['counters']['FALLOW_SLOTS_MILLIS_REDUCES']['total'], 0L)
    assert_true(response.context['job'].counters['org.apache.hadoop.mapreduce.JobCounter']['counters']['SLOTS_MILLIS_MAPS']['total'] > 0)
    assert_true(response.context['job'].counters['org.apache.hadoop.mapreduce.JobCounter']['counters']['SLOTS_MILLIS_REDUCES']['total'] > 0)

    # There should be 4 tasks for this job: cleanup, setup, map, reduce
    response = self.client.get('/jobbrowser/jobs/%s/tasks' % (hadoop_job_id,))
    assert_true(len(response.context['page'].object_list), 4)
    # Select by tasktype
    response = self.client.get('/jobbrowser/jobs/%s/tasks?tasktype=reduce' % (hadoop_job_id,))
    assert_true(len(response.context['page'].object_list), 1)
    # Select by taskstate
    response = self.client.get('/jobbrowser/jobs/%s/tasks?taskstate=succeeded' % (hadoop_job_id,))
    assert_true(len(response.context['page'].object_list), 4)
    # Select by text
    response = self.client.get('/jobbrowser/jobs/%s/tasks?tasktext=clean' % (hadoop_job_id,))
    assert_true(len(response.context['page'].object_list), 1)

    # Test job single logs page
    response = self.client.get('/jobbrowser/jobs/%s/single_logs' % (hadoop_job_id))
    assert_true('syslog' in response.content)
    assert_true('<div class="tab-pane active" id="logsSysLog">' in response.content or
                '<div class="tab-pane active" id="logsStdErr">' in response.content or # Depending on Hadoop
                '<div class="tab-pane active" id="logsStdOut">' in response.content, # For jenkins
                response.content)
Example #13
0
 def setup_class(cls):
   OozieServerProvider.setup_class()
   if not cls.cluster.fs.exists("/tmp"):
     cls.cluster.fs.do_as_superuser(cls.cluster.fs.mkdir, "/tmp")
   cls.cluster.fs.do_as_superuser(cls.cluster.fs.chmod, "/tmp", 0777)
Example #14
0
    def test_failed_jobs(self):
        """
    Test jobs with genuine failure, not just killed
    """
        # Create design that will fail because the script file isn't there
        INPUT_DIR = self.home_dir + '/input'
        OUTPUT_DIR = self.home_dir + '/output'
        try:
            self.cluster.fs.mkdir(self.home_dir + "/jt-test_failed_jobs")
            self.cluster.fs.mkdir(INPUT_DIR)
            self.cluster.fs.rmtree(OUTPUT_DIR)
        except:
            # rmtree probably failed here.
            pass
        response = self.client.post('/jobsub/new_design/mapreduce', {
            'wf-name': ['test_failed_jobs-1'],
            'wf-description': ['description test_failed_jobs-1'],
            'action-args': [''],
            'action-jar_path':
            ['/user/hue/jobsub/examples/hadoop-examples.jar'],
            'action-archives': ['[]'],
            'action-job_properties': [
                '[{"name":"mapred.input.dir","value":"%s"},\
            {"name":"mapred.output.dir","value":"%s"},\
            {"name":"mapred.mapper.class","value":"org.apache.hadoop.mapred.lib.dne"},\
            {"name":"mapred.combiner.class","value":"org.apache.hadoop.mapred.lib.dne"},\
            {"name":"mapred.reducer.class","value":"org.apache.hadoop.mapred.lib.dne"}]'
                % (INPUT_DIR, OUTPUT_DIR)
            ],
            'action-files': ['[]']
        },
                                    follow=True)
        designs = json.loads(response.context['designs'])

        # Submit the job
        design_id = designs[0]['id']
        response = self.client.post("/jobsub/submit_design/%d" % design_id,
                                    follow=True)
        oozie_jobid = response.context['jobid']
        job = OozieServerProvider.wait_until_completion(oozie_jobid,
                                                        timeout=500,
                                                        step=1)
        hadoop_job_id = get_hadoop_job_id(self.oozie, oozie_jobid, 1)

        # Select only killed jobs (should be absent)
        # Taking advantage of the fact new jobs are at the top of the list!
        response = self.client.get('/jobbrowser/jobs/?state=killed')
        assert_false(hadoop_job_id in response.content)

        # Select only failed jobs (should be present)
        # Map job should succeed. Reduce job should fail.
        response = self.client.get('/jobbrowser/jobs/?state=failed')
        assert_true(hadoop_job_id in response.content)

        # The single job view should have the failed task table
        response = self.client.get('/jobbrowser/jobs/%s' % (hadoop_job_id, ))
        html = response.content.lower()
        assert_true('failed task' in html)

        # The map task should say success (empty input)
        map_task_id = hadoop_job_id.replace('job', 'task') + '_m_000000'
        response = self.client.get('/jobbrowser/jobs/%s/tasks/%s' %
                                   (hadoop_job_id, map_task_id))
        assert_true('succeed' in response.content)
        assert_true('failed' not in response.content)

        # The reduce task should say failed
        reduce_task_id = hadoop_job_id.replace('job', 'task') + '_r_000000'
        response = self.client.get('/jobbrowser/jobs/%s/tasks/%s' %
                                   (hadoop_job_id, reduce_task_id))
        assert_true('succeed' not in response.content)
        assert_true('failed' in response.content)

        # Selecting by failed state should include the failed map
        response = self.client.get(
            '/jobbrowser/jobs/%s/tasks?taskstate=failed' % (hadoop_job_id, ))
        assert_true('_r_000000' in response.content)
        assert_true('_m_000000' not in response.content)
Example #15
0
  def test_failed_jobs(self):
    """
    Test jobs with genuine failure, not just killed
    """
    # Create design that will fail because the script file isn't there
    INPUT_DIR = self.home_dir + '/input'
    OUTPUT_DIR = self.home_dir + '/output'
    try:
        self.cluster.fs.mkdir(self.home_dir + "/jt-test_failed_jobs")
        self.cluster.fs.mkdir(INPUT_DIR)
        self.cluster.fs.rmtree(OUTPUT_DIR)
    except:
        # rmtree probably failed here.
        pass
    response = self.client.post(reverse('jobsub.views.new_design', kwargs={'node_type': 'mapreduce'}), {
        'name': ['test_failed_jobs-1'],
        'description': ['description test_failed_jobs-1'],
        'args': '',
        'jar_path': '/user/hue/oozie/workspaces/lib/hadoop-examples.jar',
        'prepares': '[]',
        'archives': '[]',
        'files': '[]',
        'job_properties': ['[{"name":"mapred.input.dir","value":"%s"},\
            {"name":"mapred.output.dir","value":"%s"},\
            {"name":"mapred.mapper.class","value":"org.apache.hadoop.mapred.lib.dne"},\
            {"name":"mapred.combiner.class","value":"org.apache.hadoop.mapred.lib.dne"},\
            {"name":"mapred.reducer.class","value":"org.apache.hadoop.mapred.lib.dne"}]' % (INPUT_DIR, OUTPUT_DIR)]
        }, HTTP_X_REQUESTED_WITH='XMLHttpRequest', follow=True)

    # Submit the job
    design_dict = json.loads(response.content)
    design_id = int(design_dict['id'])
    response = self.client.post(reverse('oozie:submit_workflow',
                                args=[design_id]),
                                data={u'form-MAX_NUM_FORMS': [u''],
                                      u'form-INITIAL_FORMS': [u'1'],
                                      u'form-0-name': [u'REDUCER_SLEEP_TIME'],
                                      u'form-0-value': [u'1'],
                                      u'form-TOTAL_FORMS': [u'1']},
                                follow=True)
    oozie_jobid = response.context['oozie_workflow'].id
    job = OozieServerProvider.wait_until_completion(oozie_jobid, timeout=120, step=1)
    hadoop_job_id = get_hadoop_job_id(self.oozie, oozie_jobid, 1)
    hadoop_job_id_short = views.get_shorter_id(hadoop_job_id)

    # Select only killed jobs (should be absent)
    # Taking advantage of the fact new jobs are at the top of the list!
    response = self.client.get('/jobbrowser/jobs/?state=killed')
    assert_false(hadoop_job_id_short in response.content)

    # Select only failed jobs (should be present)
    # Map job should succeed. Reduce job should fail.
    response = self.client.get('/jobbrowser/jobs/?state=failed')
    assert_true(hadoop_job_id_short in response.content)

    # The single job view should have the failed task table
    response = self.client.get('/jobbrowser/jobs/%s' % (hadoop_job_id,))
    html = response.content.lower()
    assert_true('failed task' in html)

    # The map task should say success (empty input)
    map_task_id = hadoop_job_id.replace('job', 'task') + '_m_000000'
    response = self.client.get('/jobbrowser/jobs/%s/tasks/%s' % (hadoop_job_id, map_task_id))
    assert_true('succeed' in response.content)
    assert_true('failed' not in response.content)

    # The reduce task should say failed
    reduce_task_id = hadoop_job_id.replace('job', 'task') + '_r_000000'
    response = self.client.get('/jobbrowser/jobs/%s/tasks/%s' % (hadoop_job_id, reduce_task_id))
    assert_true('succeed' not in response.content)
    assert_true('failed' in response.content)

    # Selecting by failed state should include the failed map
    response = self.client.get('/jobbrowser/jobs/%s/tasks?taskstate=failed' % (hadoop_job_id,))
    assert_true('r_000000' in response.content)
    assert_true('m_000000' not in response.content)
Example #16
0
    def test_job(self):
        """
    Test new job views.

    The status of the jobs should be the same as the status reported back by oozie.
    In this case, all jobs should succeed.
    """
        # Clone design
        assert_equal(
            0,
            OozieDesign.objects.filter(owner__username=self.username).count())
        self.client.post('/jobsub/clone_design/%d' % self.sleep_design_id)
        assert_equal(
            1,
            OozieDesign.objects.filter(owner__username=self.username).count())

        # Run the sleep example, since it doesn't require user home directory
        design_id = OozieDesign.objects.get(owner__username=self.username).id
        response = self.client.post("/jobsub/submit_design/%d" % (design_id, ),
                                    dict(map_sleep_time=1,
                                         num_maps=1,
                                         num_reduces=1,
                                         reduce_sleep_time=1),
                                    follow=True)
        oozie_jobid = response.context['jobid']
        job = OozieServerProvider.wait_until_completion(oozie_jobid,
                                                        timeout=120,
                                                        step=1)
        hadoop_job_id = get_hadoop_job_id(self.oozie, oozie_jobid, 1)

        # All jobs page and fetch job ID
        # Taking advantage of the fact new jobs are at the top of the list!
        response = self.client.get('/jobbrowser/jobs/')
        assert_true(hadoop_job_id in response.content)

        # Make sure job succeeded
        response = self.client.get('/jobbrowser/jobs/?state=completed')
        assert_true(hadoop_job_id in response.content)
        response = self.client.get('/jobbrowser/jobs/?state=failed')
        assert_false(hadoop_job_id in response.content)
        response = self.client.get('/jobbrowser/jobs/?state=running')
        assert_false(hadoop_job_id in response.content)
        response = self.client.get('/jobbrowser/jobs/?state=killed')
        assert_false(hadoop_job_id in response.content)

        # Check sharing permissions
        # Login as ourself
        finish = SHARE_JOBS.set_for_testing(True)
        try:
            response = self.client.get('/jobbrowser/jobs/?user='******'/jobbrowser/jobs/?user='******'not_me',
                                              is_superuser=False,
                                              groupname='test')
        grant_access("not_me", "test", "jobbrowser")

        finish = SHARE_JOBS.set_for_testing(True)
        try:
            response = client_not_me.get('/jobbrowser/jobs/?user='******'/jobbrowser/jobs/?user='******'/jobbrowser/jobs/%s' % hadoop_job_id)

        # Check some counters for single job.
        counters = response.context['job'].counters
        counters_file_bytes_written = counters[
            'org.apache.hadoop.mapreduce.FileSystemCounter']['counters'][
                'FILE_BYTES_WRITTEN']
        assert_true(counters_file_bytes_written['map'] > 0)
        assert_true(counters_file_bytes_written['reduce'] > 0)

        # We can't just check the complete contents of the python map because the
        # SLOTS_MILLIS_* entries have a variable number of milliseconds from
        # run-to-run.
        assert_equal(
            response.context['job'].
            counters['org.apache.hadoop.mapreduce.JobCounter']['counters']
            ['TOTAL_LAUNCHED_MAPS']['total'], 1)
        assert_equal(
            response.context['job'].
            counters['org.apache.hadoop.mapreduce.JobCounter']['counters']
            ['TOTAL_LAUNCHED_REDUCES']['total'], 1)
        assert_equal(
            response.context['job'].
            counters['org.apache.hadoop.mapreduce.JobCounter']['counters']
            ['FALLOW_SLOTS_MILLIS_MAPS']['total'], 0)
        assert_equal(
            response.context['job'].
            counters['org.apache.hadoop.mapreduce.JobCounter']['counters']
            ['FALLOW_SLOTS_MILLIS_REDUCES']['total'], 0)
        assert_true(response.context['job'].
                    counters['org.apache.hadoop.mapreduce.JobCounter']
                    ['counters']['SLOTS_MILLIS_MAPS']['total'] > 0)
        assert_true(response.context['job'].
                    counters['org.apache.hadoop.mapreduce.JobCounter']
                    ['counters']['SLOTS_MILLIS_REDUCES']['total'] > 0)

        # There should be 4 tasks for this job: cleanup, setup, map, reduce
        response = self.client.get('/jobbrowser/jobs/%s/tasks' %
                                   (hadoop_job_id, ))
        assert_true(len(response.context['page'].object_list), 4)
        # Select by tasktype
        response = self.client.get(
            '/jobbrowser/jobs/%s/tasks?tasktype=reduce' % (hadoop_job_id, ))
        assert_true(len(response.context['page'].object_list), 1)
        # Select by taskstate
        response = self.client.get(
            '/jobbrowser/jobs/%s/tasks?taskstate=succeeded' %
            (hadoop_job_id, ))
        assert_true(len(response.context['page'].object_list), 4)
        # Select by text
        response = self.client.get('/jobbrowser/jobs/%s/tasks?tasktext=clean' %
                                   (hadoop_job_id, ))
        assert_true(len(response.context['page'].object_list), 1)

        # Test job single logs page
        response = self.client.get('/jobbrowser/jobs/%s/single_logs' %
                                   (hadoop_job_id))
        assert_true('syslog' in response.content)
Example #17
0
    def test_jobsub_setup_and_run_samples(self):
        """
    Merely exercises jobsub_setup, and then runs the sleep example.
    """
        if not jobsub_setup.Command().has_been_setup():
            jobsub_setup.Command().handle()
        self.cluster.fs.setuser('jobsub_test')

        assert_equal(
            3,
            OozieDesign.objects.filter(owner__username='******').count())
        assert_equal(
            2,
            OozieMapreduceAction.objects.filter(
                ooziedesign__owner__username='******').count())
        assert_equal(
            1,
            OozieStreamingAction.objects.filter(
                ooziedesign__owner__username='******').count())

        # Make sure sample user got created.
        assert_equal(1, User.objects.filter(username='******').count())

        # Clone design
        assert_equal(
            0,
            OozieDesign.objects.filter(owner__username='******').count())
        jobid = OozieDesign.objects.get(name='sleep_job',
                                        owner__username='******').id

        self.client.post('/jobsub/clone_design/%d' % jobid)
        assert_equal(
            1,
            OozieDesign.objects.filter(owner__username='******').count())
        jobid = OozieDesign.objects.get(owner__username='******').id

        # And now submit and run the sleep sample
        response = self.client.post('/jobsub/submit_design/%d' % jobid, {
            'num_reduces': 1,
            'num_maps': 1,
            'map_sleep_time': 1,
            'reduce_sleep_time': 1
        },
                                    follow=True)

        assert_true(
            sum([
                status in response.content for status in ('PREP', 'OK', 'DONE')
            ]) > 0)
        assert_true(str(jobid) in response.content)

        oozie_job_id = response.context['jobid']
        job = OozieServerProvider.wait_until_completion(oozie_job_id,
                                                        timeout=120,
                                                        step=1)
        logs = OozieServerProvider.oozie.get_job_log(oozie_job_id)

        assert_equal('SUCCEEDED', job.status, logs)

        # Grep
        n = OozieDesign.objects.filter(owner__username='******').count()
        jobid = OozieDesign.objects.get(name='grep_example').id

        self.client.post('/jobsub/clone_design/%d' % jobid)
        assert_equal(
            n + 1,
            OozieDesign.objects.filter(owner__username='******').count())
        jobid = OozieDesign.objects.get(owner__username='******',
                                        name__contains='sleep_job').id

        # And now submit and run the sleep sample
        response = self.client.post('/jobsub/submit_design/%d' % jobid, {
            'num_reduces': 1,
            'num_maps': 1,
            'map_sleep_time': 1,
            'reduce_sleep_time': 1
        },
                                    follow=True)

        assert_true(
            sum([
                status in response.content for status in ('PREP', 'OK', 'DONE')
            ]) > 0)
        assert_true(str(jobid) in response.content)

        oozie_job_id = response.context['jobid']
        job = OozieServerProvider.wait_until_completion(oozie_job_id,
                                                        timeout=60,
                                                        step=1)
        logs = OozieServerProvider.oozie.get_job_log(oozie_job_id)

        assert_equal('SUCCEEDED', job.status, logs)
Example #18
0
 def setUp(self):
   OozieServerProvider.setup_class()
   self.cluster.fs.do_as_user('test', self.cluster.fs.create_home_dir, '/user/test')
   self.cluster.fs.do_as_superuser(self.cluster.fs.chmod, '/user/test', 0777, True)
   self.client = make_logged_in_client()
Example #19
0
File: tests.py Project: ymc/hue
 def setup_class(cls):
   OozieServerProvider.setup_class()
Example #20
0
  def test_jobsub_setup_and_run_samples(self):
    """
    Merely exercises jobsub_setup, and then runs the sleep example.
    """
    if not jobsub_setup.Command().has_been_setup():
      jobsub_setup.Command().handle()
    self.cluster.fs.setuser('jobsub_test')

    assert_equal(3, OozieDesign.objects.filter(owner__username='******').count())
    assert_equal(2, OozieMapreduceAction.objects.filter(ooziedesign__owner__username='******').count())
    assert_equal(1, OozieStreamingAction.objects.filter(ooziedesign__owner__username='******').count())

    # Make sure sample user got created.
    assert_equal(1, User.objects.filter(username='******').count())

    # Clone design
    assert_equal(0, OozieDesign.objects.filter(owner__username='******').count())
    jobid = OozieDesign.objects.get(name='sleep_job', owner__username='******').id

    self.client.post('/jobsub/clone_design/%d' % jobid)
    assert_equal(1, OozieDesign.objects.filter(owner__username='******').count())
    jobid = OozieDesign.objects.get(owner__username='******').id

    # And now submit and run the sleep sample
    response = self.client.post('/jobsub/submit_design/%d' % jobid, {
        'num_reduces': 1,
        'num_maps': 1,
        'map_sleep_time': 1,
        'reduce_sleep_time': 1}, follow=True)

    assert_true(sum([status in response.content for status in ('PREP', 'OK', 'DONE')]) > 0)
    assert_true(str(jobid) in response.content)

    oozie_job_id = response.context['jobid']
    job = OozieServerProvider.wait_until_completion(oozie_job_id, timeout=120, step=1)
    logs = OozieServerProvider.oozie.get_job_log(oozie_job_id)

    assert_equal('SUCCEEDED', job.status, logs)


    # Grep
    n = OozieDesign.objects.filter(owner__username='******').count()
    jobid = OozieDesign.objects.get(name='grep_example').id

    self.client.post('/jobsub/clone_design/%d' % jobid)
    assert_equal(n + 1, OozieDesign.objects.filter(owner__username='******').count())
    jobid = OozieDesign.objects.get(owner__username='******', name__contains='sleep_job').id

    # And now submit and run the sleep sample
    response = self.client.post('/jobsub/submit_design/%d' % jobid, {
        'num_reduces': 1,
        'num_maps': 1,
        'map_sleep_time': 1,
        'reduce_sleep_time': 1}, follow=True)

    assert_true(sum([status in response.content for status in ('PREP', 'OK', 'DONE')]) > 0)
    assert_true(str(jobid) in response.content)

    oozie_job_id = response.context['jobid']
    job = OozieServerProvider.wait_until_completion(oozie_job_id, timeout=60, step=1)
    logs = OozieServerProvider.oozie.get_job_log(oozie_job_id)

    assert_equal('SUCCEEDED', job.status, logs)
Example #21
0
  def test_job(self):
    """
    Test new job views.

    The status of the jobs should be the same as the status reported back by oozie.
    In this case, all jobs should succeed.
    """
    # Clone design
    assert_equal(0, OozieDesign.objects.filter(owner__username=self.username).count())
    self.client.post('/jobsub/clone_design/%d' % self.sleep_design_id)
    assert_equal(1, OozieDesign.objects.filter(owner__username=self.username).count())

    # Run the sleep example, since it doesn't require user home directory
    design_id = OozieDesign.objects.get(owner__username=self.username).id
    response = self.client.post("/jobsub/submit_design/%d" % (design_id,),
      dict(map_sleep_time=1,
           num_maps=1,
           num_reduces=1,
           reduce_sleep_time=1),
      follow=True)
    oozie_jobid = response.context['jobid']
    job = OozieServerProvider.wait_until_completion(oozie_jobid, timeout=120, step=1)
    hadoop_job_id = get_hadoop_job_id(self.oozie, oozie_jobid, 1)
    hadoop_job_id_short = views.get_shorter_id(hadoop_job_id)

    # All jobs page and fetch job ID
    # Taking advantage of the fact new jobs are at the top of the list!
    response = self.client.get('/jobbrowser/jobs/')
    assert_true(hadoop_job_id_short in response.content)

    # Make sure job succeeded
    response = self.client.get('/jobbrowser/jobs/?state=completed')
    assert_true(hadoop_job_id_short in response.content)
    response = self.client.get('/jobbrowser/jobs/?state=failed')
    assert_false(hadoop_job_id_short in response.content)
    response = self.client.get('/jobbrowser/jobs/?state=running')
    assert_false(hadoop_job_id_short in response.content)
    response = self.client.get('/jobbrowser/jobs/?state=killed')
    assert_false(hadoop_job_id_short in response.content)

    # Check sharing permissions
    # Login as ourself
    finish = SHARE_JOBS.set_for_testing(True)
    try:
      response = self.client.get('/jobbrowser/jobs/?user='******'/jobbrowser/jobs/?user='******'not_me', is_superuser=False, groupname='test')
    grant_access("not_me", "test", "jobbrowser")

    finish = SHARE_JOBS.set_for_testing(True)
    try:
      response = client_not_me.get('/jobbrowser/jobs/?user='******'/jobbrowser/jobs/?user='******'/jobbrowser/jobs/%s' % hadoop_job_id)

    # Check some counters for single job.
    counters = response.context['job'].counters
    counters_file_bytes_written = counters['org.apache.hadoop.mapreduce.FileSystemCounter']['counters']['FILE_BYTES_WRITTEN']
    assert_true(counters_file_bytes_written['map'] > 0)
    assert_true(counters_file_bytes_written['reduce'] > 0)

    # We can't just check the complete contents of the python map because the
    # SLOTS_MILLIS_* entries have a variable number of milliseconds from
    # run-to-run.
    assert_equal(response.context['job'].counters['org.apache.hadoop.mapreduce.JobCounter']['counters']['TOTAL_LAUNCHED_MAPS']['total'], 1)
    assert_equal(response.context['job'].counters['org.apache.hadoop.mapreduce.JobCounter']['counters']['TOTAL_LAUNCHED_REDUCES']['total'], 1)
    assert_equal(response.context['job'].counters['org.apache.hadoop.mapreduce.JobCounter']['counters']['FALLOW_SLOTS_MILLIS_MAPS']['total'], 0)
    assert_equal(response.context['job'].counters['org.apache.hadoop.mapreduce.JobCounter']['counters']['FALLOW_SLOTS_MILLIS_REDUCES']['total'], 0)
    assert_true(response.context['job'].counters['org.apache.hadoop.mapreduce.JobCounter']['counters']['SLOTS_MILLIS_MAPS']['total'] > 0)
    assert_true(response.context['job'].counters['org.apache.hadoop.mapreduce.JobCounter']['counters']['SLOTS_MILLIS_REDUCES']['total'] > 0)

    # There should be 4 tasks for this job: cleanup, setup, map, reduce
    response = self.client.get('/jobbrowser/jobs/%s/tasks' % (hadoop_job_id,))
    assert_true(len(response.context['page'].object_list), 4)
    # Select by tasktype
    response = self.client.get('/jobbrowser/jobs/%s/tasks?tasktype=reduce' % (hadoop_job_id,))
    assert_true(len(response.context['page'].object_list), 1)
    # Select by taskstate
    response = self.client.get('/jobbrowser/jobs/%s/tasks?taskstate=succeeded' % (hadoop_job_id,))
    assert_true(len(response.context['page'].object_list), 4)
    # Select by text
    response = self.client.get('/jobbrowser/jobs/%s/tasks?tasktext=clean' % (hadoop_job_id,))
    assert_true(len(response.context['page'].object_list), 1)

    # Test job single logs page
    response = self.client.get('/jobbrowser/jobs/%s/single_logs' % (hadoop_job_id))
    assert_true('syslog' in response.content)
Example #22
0
  def test_failed_jobs(self):
    """
    Test jobs with genuine failure, not just killed
    """
    # Create design that will fail because the script file isn't there
    INPUT_DIR = self.home_dir + '/input'
    OUTPUT_DIR = self.home_dir + '/output'
    try:
        self.cluster.fs.mkdir(self.home_dir + "/jt-test_failed_jobs")
        self.cluster.fs.mkdir(INPUT_DIR)
        self.cluster.fs.rmtree(OUTPUT_DIR)
    except:
        # rmtree probably failed here.
        pass
    response = self.client.post('/jobsub/new_design/mapreduce', {
        'wf-name': ['test_failed_jobs-1'],
        'wf-description': ['description test_failed_jobs-1'],
        'action-args': [''],
        'action-jar_path': ['/user/hue/jobsub/examples/hadoop-examples.jar'],
        'action-archives': ['[]'],
        'action-job_properties': ['[{"name":"mapred.input.dir","value":"%s"},\
            {"name":"mapred.output.dir","value":"%s"},\
            {"name":"mapred.mapper.class","value":"org.apache.hadoop.mapred.lib.dne"},\
            {"name":"mapred.combiner.class","value":"org.apache.hadoop.mapred.lib.dne"},\
            {"name":"mapred.reducer.class","value":"org.apache.hadoop.mapred.lib.dne"}]' % (INPUT_DIR, OUTPUT_DIR)],
        'action-files': ['[]']}, follow=True)
    designs = json.loads(response.context['designs'])

    # Submit the job
    design_id = designs[0]['id']
    response = self.client.post("/jobsub/submit_design/%d" % design_id, follow=True)
    oozie_jobid = response.context['jobid']
    OozieServerProvider.wait_until_completion(oozie_jobid, timeout=500, step=1)
    hadoop_job_id = get_hadoop_job_id(self.oozie, oozie_jobid, 1)
    hadoop_job_id_short = views.get_shorter_id(hadoop_job_id)

    # Select only killed jobs (should be absent)
    # Taking advantage of the fact new jobs are at the top of the list!
    response = self.client.get('/jobbrowser/jobs/?state=killed')
    assert_false(hadoop_job_id_short in response.content)

    # Select only failed jobs (should be present)
    # Map job should succeed. Reduce job should fail.
    response = self.client.get('/jobbrowser/jobs/?state=failed')
    assert_true(hadoop_job_id_short in response.content)

    # The single job view should have the failed task table
    response = self.client.get('/jobbrowser/jobs/%s' % (hadoop_job_id,))
    html = response.content.lower()
    assert_true('failed task' in html)

    # The map task should say success (empty input)
    map_task_id = hadoop_job_id.replace('job', 'task') + '_m_000000'
    response = self.client.get('/jobbrowser/jobs/%s/tasks/%s' % (hadoop_job_id, map_task_id))
    assert_true('succeed' in response.content)
    assert_true('failed' not in response.content)

    # The reduce task should say failed
    reduce_task_id = hadoop_job_id.replace('job', 'task') + '_r_000000'
    response = self.client.get('/jobbrowser/jobs/%s/tasks/%s' % (hadoop_job_id, reduce_task_id))
    assert_true('succeed' not in response.content)
    assert_true('failed' in response.content)

    # Selecting by failed state should include the failed map
    response = self.client.get('/jobbrowser/jobs/%s/tasks?taskstate=failed' % (hadoop_job_id,))
    assert_true('r_000000' in response.content)
    assert_true('m_000000' not in response.content)