def setup_class(cls): OozieServerProvider.setup_class() cls.username = '******' cls.home_dir = '/user/%s' % cls.username cls.cluster.fs.do_as_user(cls.username, cls.cluster.fs.create_home_dir, cls.home_dir) cls.client = make_logged_in_client(username=cls.username, is_superuser=False, groupname='test') cls.user = User.objects.get(username=cls.username) grant_access(cls.username, 'test', 'jobsub') grant_access(cls.username, 'test', 'jobbrowser') grant_access(cls.username, 'test', 'oozie') add_to_group(cls.username) cls.prev_user = cls.cluster.fs.user cls.cluster.fs.setuser(cls.username) cls.install_examples() cls.design = cls.create_design() # Run the sleep example, since it doesn't require user home directory design_id = cls.design.id response = cls.client.post(reverse('oozie:submit_workflow', args=[design_id]), data={u'form-MAX_NUM_FORMS': [u''], u'form-INITIAL_FORMS': [u'1'], u'form-0-name': [u'REDUCER_SLEEP_TIME'], u'form-0-value': [u'1'], u'form-TOTAL_FORMS': [u'1']}, follow=True) oozie_jobid = response.context['oozie_workflow'].id OozieServerProvider.wait_until_completion(oozie_jobid) cls.hadoop_job_id = get_hadoop_job_id(cls.oozie, oozie_jobid, 1) cls.hadoop_job_id_short = views.get_shorter_id(cls.hadoop_job_id)
def setUp(self): """ To clean: creating test1, test2, test3...users """ TestJobBrowserWithHadoop.user_count += 1 self.username = '******' + str(TestJobBrowserWithHadoop.user_count) self.home_dir = '/user/%s' % self.username self.cluster.fs.do_as_user(self.username, self.cluster.fs.create_home_dir, self.home_dir) self.client = make_logged_in_client(username=self.username, is_superuser=False, groupname='test') self.user = User.objects.get(username=self.username) grant_access(self.username, 'test', 'jobsub') grant_access(self.username, 'test', 'jobbrowser') grant_access(self.username, 'test', 'oozie') add_to_group(self.username) self.prev_user = self.cluster.fs.user self.cluster.fs.setuser(self.username) self.install_examples() self.design = self.create_design() raise SkipTest # Run the sleep example, since it doesn't require user home directory design_id = self.design.id response = self.client.post(reverse('oozie:submit_workflow', args=[design_id]), data={ u'form-MAX_NUM_FORMS': [u''], u'form-INITIAL_FORMS': [u'1'], u'form-0-name': [u'REDUCER_SLEEP_TIME'], u'form-0-value': [u'1'], u'form-TOTAL_FORMS': [u'1'] }, follow=True) oozie_jobid = response.context['oozie_workflow'].id OozieServerProvider.wait_until_completion(oozie_jobid, timeout=120, step=1) self.hadoop_job_id = get_hadoop_job_id(self.oozie, oozie_jobid, 1) self.hadoop_job_id_short = views.get_shorter_id(self.hadoop_job_id)
def setUp(self): """ To clean: creating test1, test2, test3...users """ TestJobBrowserWithHadoop.user_count += 1 self.username = '******' + str(TestJobBrowserWithHadoop.user_count) self.home_dir = '/user/%s' % self.username self.cluster.fs.do_as_user(self.username, self.cluster.fs.create_home_dir, self.home_dir) self.client = make_logged_in_client(username=self.username, is_superuser=False, groupname='test') self.user = User.objects.get(username=self.username) grant_access(self.username, 'test', 'jobsub') grant_access(self.username, 'test', 'jobbrowser') grant_access(self.username, 'test', 'oozie') add_to_group(self.username) self.prev_user = self.cluster.fs.user self.cluster.fs.setuser(self.username) self.install_examples() self.design = self.create_design() raise SkipTest # Run the sleep example, since it doesn't require user home directory design_id = self.design.id response = self.client.post(reverse('oozie:submit_workflow', args=[design_id]), data={u'form-MAX_NUM_FORMS': [u''], u'form-INITIAL_FORMS': [u'1'], u'form-0-name': [u'REDUCER_SLEEP_TIME'], u'form-0-value': [u'1'], u'form-TOTAL_FORMS': [u'1']}, follow=True) oozie_jobid = response.context['oozie_workflow'].id OozieServerProvider.wait_until_completion(oozie_jobid, timeout=120, step=1) self.hadoop_job_id = get_hadoop_job_id(self.oozie, oozie_jobid, 1) self.hadoop_job_id_short = views.get_shorter_id(self.hadoop_job_id)
def setup_class(cls): OozieServerProvider.setup_class() cls.username = "******" cls.home_dir = "/user/%s" % cls.username cls.cluster.fs.do_as_user(cls.username, cls.cluster.fs.create_home_dir, cls.home_dir) cls.client = make_logged_in_client(username=cls.username, is_superuser=False, groupname="test") cls.user = User.objects.get(username=cls.username) grant_access(cls.username, "test", "jobsub") grant_access(cls.username, "test", "jobbrowser") grant_access(cls.username, "test", "oozie") add_to_group(cls.username) cls.prev_user = cls.cluster.fs.user cls.cluster.fs.setuser(cls.username) cls.install_examples() cls.design = cls.create_design() # Run the sleep example, since it doesn't require user home directory design_id = cls.design.id response = cls.client.post( reverse("oozie:submit_workflow", args=[design_id]), data={ u"form-MAX_NUM_FORMS": [u""], u"form-INITIAL_FORMS": [u"1"], u"form-0-name": [u"REDUCER_SLEEP_TIME"], u"form-0-value": [u"1"], u"form-TOTAL_FORMS": [u"1"], }, follow=True, ) oozie_jobid = response.context["oozie_workflow"].id OozieServerProvider.wait_until_completion(oozie_jobid) cls.hadoop_job_id = get_hadoop_job_id(cls.oozie, oozie_jobid, 1) cls.hadoop_job_id_short = views.get_shorter_id(cls.hadoop_job_id)
def test_failed_jobs(self): """ Test jobs with genuine failure, not just killed """ if is_live_cluster(): raise SkipTest('HUE-2902: Skipping because test is not reentrant') # Create design that will fail because the script file isn't there INPUT_DIR = TestJobBrowserWithHadoop.home_dir + '/input' OUTPUT_DIR = TestJobBrowserWithHadoop.home_dir + '/output' try: TestJobBrowserWithHadoop.cluster.fs.mkdir( TestJobBrowserWithHadoop.home_dir + "/jt-test_failed_jobs") TestJobBrowserWithHadoop.cluster.fs.mkdir(INPUT_DIR) TestJobBrowserWithHadoop.cluster.fs.rmtree(OUTPUT_DIR) except: LOG.exception('failed to teardown tests') job_name = '%s_%s' % (TestJobBrowserWithHadoop.username, 'test_failed_jobs-1') response = TestJobBrowserWithHadoop.client.post( reverse('jobsub.views.new_design', kwargs={'node_type': 'mapreduce'}), { 'name': [job_name], 'description': ['description test_failed_jobs-1'], 'args': '', 'jar_path': '/user/hue/oozie/workspaces/lib/hadoop-examples.jar', 'prepares': '[]', 'archives': '[]', 'files': '[]', 'job_properties': [ '[{"name":"mapred.input.dir","value":"%s"},\ {"name":"mapred.output.dir","value":"%s"},\ {"name":"mapred.mapper.class","value":"org.apache.hadoop.mapred.lib.dne"},\ {"name":"mapred.combiner.class","value":"org.apache.hadoop.mapred.lib.dne"},\ {"name":"mapred.reducer.class","value":"org.apache.hadoop.mapred.lib.dne"}]' % (INPUT_DIR, OUTPUT_DIR) ] }, HTTP_X_REQUESTED_WITH='XMLHttpRequest', follow=True) # Submit the job design_dict = json.loads(response.content) design_id = int(design_dict['id']) response = TestJobBrowserWithHadoop.client.post( reverse('oozie:submit_workflow', args=[design_id]), data={ u'form-MAX_NUM_FORMS': [u''], u'form-INITIAL_FORMS': [u'1'], u'form-0-name': [u'REDUCER_SLEEP_TIME'], u'form-0-value': [u'1'], u'form-TOTAL_FORMS': [u'1'] }, follow=True) oozie_jobid = response.context['oozie_workflow'].id job = OozieServerProvider.wait_until_completion(oozie_jobid) hadoop_job_id = get_hadoop_job_id(TestJobBrowserWithHadoop.oozie, oozie_jobid, 1) hadoop_job_id_short = views.get_shorter_id(hadoop_job_id) # Select only killed jobs (should be absent) # Taking advantage of the fact new jobs are at the top of the list! response = TestJobBrowserWithHadoop.client.get( '/jobbrowser/jobs/?format=json&state=killed') assert_false(hadoop_job_id_short in response.content) # Select only failed jobs (should be present) # Map job should succeed. Reduce job should fail. response = TestJobBrowserWithHadoop.client.get( '/jobbrowser/jobs/?format=json&state=failed') assert_true(hadoop_job_id_short in response.content) raise SkipTest # Not compatible with MR2 # The single job view should have the failed task table response = TestJobBrowserWithHadoop.client.get('/jobbrowser/jobs/%s' % (hadoop_job_id, )) html = response.content.lower() assert_true('failed task' in html, html) # The map task should say success (empty input) map_task_id = TestJobBrowserWithHadoop.hadoop_job_id.replace( 'job', 'task') + '_m_000000' response = TestJobBrowserWithHadoop.client.get( '/jobbrowser/jobs/%s/tasks/%s' % (hadoop_job_id, map_task_id)) assert_true('succeed' in response.content) assert_true('failed' not in response.content) # The reduce task should say failed reduce_task_id = hadoop_job_id.replace('job', 'task') + '_r_000000' response = TestJobBrowserWithHadoop.client.get( '/jobbrowser/jobs/%s/tasks/%s' % (hadoop_job_id, reduce_task_id)) assert_true('succeed' not in response.content) assert_true('failed' in response.content) # Selecting by failed state should include the failed map response = TestJobBrowserWithHadoop.client.get( '/jobbrowser/jobs/%s/tasks?taskstate=failed' % (hadoop_job_id, )) assert_true('r_000000' in response.content) assert_true('m_000000' not in response.content)
def test_failed_jobs(self): """ Test jobs with genuine failure, not just killed """ if is_live_cluster(): raise SkipTest('HUE-2902: Skipping because test is not reentrant') # Create design that will fail because the script file isn't there INPUT_DIR = TestJobBrowserWithHadoop.home_dir + '/input' OUTPUT_DIR = TestJobBrowserWithHadoop.home_dir + '/output' try: TestJobBrowserWithHadoop.cluster.fs.mkdir(TestJobBrowserWithHadoop.home_dir + "/jt-test_failed_jobs") TestJobBrowserWithHadoop.cluster.fs.mkdir(INPUT_DIR) TestJobBrowserWithHadoop.cluster.fs.rmtree(OUTPUT_DIR) except: LOG.exception('failed to teardown tests') job_name = '%s_%s' % (TestJobBrowserWithHadoop.username, 'test_failed_jobs-1') response = TestJobBrowserWithHadoop.client.post(reverse('jobsub.views.new_design', kwargs={'node_type': 'mapreduce'}), { 'name': [job_name], 'description': ['description test_failed_jobs-1'], 'args': '', 'jar_path': '/user/hue/oozie/workspaces/lib/hadoop-examples.jar', 'prepares': '[]', 'archives': '[]', 'files': '[]', 'job_properties': ['[{"name":"mapred.input.dir","value":"%s"},\ {"name":"mapred.output.dir","value":"%s"},\ {"name":"mapred.mapper.class","value":"org.apache.hadoop.mapred.lib.dne"},\ {"name":"mapred.combiner.class","value":"org.apache.hadoop.mapred.lib.dne"},\ {"name":"mapred.reducer.class","value":"org.apache.hadoop.mapred.lib.dne"}]' % (INPUT_DIR, OUTPUT_DIR)] }, HTTP_X_REQUESTED_WITH='XMLHttpRequest', follow=True) # Submit the job design_dict = json.loads(response.content) design_id = int(design_dict['id']) response = TestJobBrowserWithHadoop.client.post(reverse('oozie:submit_workflow', args=[design_id]), data={u'form-MAX_NUM_FORMS': [u''], u'form-INITIAL_FORMS': [u'1'], u'form-0-name': [u'REDUCER_SLEEP_TIME'], u'form-0-value': [u'1'], u'form-TOTAL_FORMS': [u'1']}, follow=True) oozie_jobid = response.context['oozie_workflow'].id job = OozieServerProvider.wait_until_completion(oozie_jobid) hadoop_job_id = get_hadoop_job_id(TestJobBrowserWithHadoop.oozie, oozie_jobid, 1) hadoop_job_id_short = views.get_shorter_id(hadoop_job_id) # Select only killed jobs (should be absent) # Taking advantage of the fact new jobs are at the top of the list! response = TestJobBrowserWithHadoop.client.get('/jobbrowser/jobs/?format=json&state=killed') assert_false(hadoop_job_id_short in response.content) # Select only failed jobs (should be present) # Map job should succeed. Reduce job should fail. response = TestJobBrowserWithHadoop.client.get('/jobbrowser/jobs/?format=json&state=failed') assert_true(hadoop_job_id_short in response.content) raise SkipTest # Not compatible with MR2 # The single job view should have the failed task table response = TestJobBrowserWithHadoop.client.get('/jobbrowser/jobs/%s' % (hadoop_job_id,)) html = response.content.lower() assert_true('failed task' in html, html) # The map task should say success (empty input) map_task_id = TestJobBrowserWithHadoop.hadoop_job_id.replace('job', 'task') + '_m_000000' response = TestJobBrowserWithHadoop.client.get('/jobbrowser/jobs/%s/tasks/%s' % (hadoop_job_id, map_task_id)) assert_true('succeed' in response.content) assert_true('failed' not in response.content) # The reduce task should say failed reduce_task_id = hadoop_job_id.replace('job', 'task') + '_r_000000' response = TestJobBrowserWithHadoop.client.get('/jobbrowser/jobs/%s/tasks/%s' % (hadoop_job_id, reduce_task_id)) assert_true('succeed' not in response.content) assert_true('failed' in response.content) # Selecting by failed state should include the failed map response = TestJobBrowserWithHadoop.client.get('/jobbrowser/jobs/%s/tasks?taskstate=failed' % (hadoop_job_id,)) assert_true('r_000000' in response.content) assert_true('m_000000' not in response.content)
def test_job(self): """ Test new job views. The status of the jobs should be the same as the status reported back by oozie. In this case, all jobs should succeed. """ # Run the sleep example, since it doesn't require user home directory design_id = self.design.id response = self.client.post(reverse('oozie:submit_workflow', args=[design_id]), data={ u'form-MAX_NUM_FORMS': [u''], u'form-INITIAL_FORMS': [u'1'], u'form-0-name': [u'REDUCER_SLEEP_TIME'], u'form-0-value': [u'1'], u'form-TOTAL_FORMS': [u'1'] }, follow=True) oozie_jobid = response.context['oozie_workflow'].id OozieServerProvider.wait_until_completion(oozie_jobid, timeout=120, step=1) hadoop_job_id = get_hadoop_job_id(self.oozie, oozie_jobid, 1) hadoop_job_id_short = views.get_shorter_id(hadoop_job_id) # All jobs page and fetch job ID # Taking advantage of the fact new jobs are at the top of the list! response = self.client.get('/jobbrowser/jobs/') assert_true(hadoop_job_id_short in response.content, response.content) # Make sure job succeeded response = self.client.get('/jobbrowser/jobs/?state=completed') assert_true(hadoop_job_id_short in response.content) response = self.client.get('/jobbrowser/jobs/?state=failed') assert_false(hadoop_job_id_short in response.content) response = self.client.get('/jobbrowser/jobs/?state=running') assert_false(hadoop_job_id_short in response.content) response = self.client.get('/jobbrowser/jobs/?state=killed') assert_false(hadoop_job_id_short in response.content) # Check sharing permissions # Login as ourself finish = SHARE_JOBS.set_for_testing(True) try: response = self.client.get('/jobbrowser/jobs/?user='******'/jobbrowser/jobs/?user='******'not_me', is_superuser=False, groupname='test') grant_access("not_me", "test", "jobbrowser") finish = SHARE_JOBS.set_for_testing(True) try: response = client_not_me.get('/jobbrowser/jobs/?user='******'/jobbrowser/jobs/?user='******'/jobbrowser/jobs/%s' % hadoop_job_id) # Check some counters for single job. counters = response.context['job'].counters counters_file_bytes_written = counters[ 'org.apache.hadoop.mapreduce.FileSystemCounter']['counters'][ 'FILE_BYTES_WRITTEN'] assert_true(counters_file_bytes_written['map'] > 0) assert_true(counters_file_bytes_written['reduce'] > 0) # We can't just check the complete contents of the python map because the # SLOTS_MILLIS_* entries have a variable number of milliseconds from # run-to-run. assert_equal( response.context['job']. counters['org.apache.hadoop.mapreduce.JobCounter']['counters'] ['TOTAL_LAUNCHED_MAPS']['total'], 2L) assert_equal( response.context['job']. counters['org.apache.hadoop.mapreduce.JobCounter']['counters'] ['TOTAL_LAUNCHED_REDUCES']['total'], 1L) assert_equal( response.context['job']. counters['org.apache.hadoop.mapreduce.JobCounter']['counters'] ['FALLOW_SLOTS_MILLIS_MAPS']['total'], 0L) assert_equal( response.context['job']. counters['org.apache.hadoop.mapreduce.JobCounter']['counters'] ['FALLOW_SLOTS_MILLIS_REDUCES']['total'], 0L) assert_true(response.context['job']. counters['org.apache.hadoop.mapreduce.JobCounter'] ['counters']['SLOTS_MILLIS_MAPS']['total'] > 0) assert_true(response.context['job']. counters['org.apache.hadoop.mapreduce.JobCounter'] ['counters']['SLOTS_MILLIS_REDUCES']['total'] > 0) # There should be 4 tasks for this job: cleanup, setup, map, reduce response = self.client.get('/jobbrowser/jobs/%s/tasks' % (hadoop_job_id, )) assert_true(len(response.context['page'].object_list), 4) # Select by tasktype response = self.client.get( '/jobbrowser/jobs/%s/tasks?tasktype=reduce' % (hadoop_job_id, )) assert_true(len(response.context['page'].object_list), 1) # Select by taskstate response = self.client.get( '/jobbrowser/jobs/%s/tasks?taskstate=succeeded' % (hadoop_job_id, )) assert_true(len(response.context['page'].object_list), 4) # Select by text response = self.client.get('/jobbrowser/jobs/%s/tasks?tasktext=clean' % (hadoop_job_id, )) assert_true(len(response.context['page'].object_list), 1) # Test job single logs page response = self.client.get('/jobbrowser/jobs/%s/single_logs' % (hadoop_job_id)) assert_true('syslog' in response.content) assert_true( '<div class="tab-pane active" id="logsSysLog">' in response.content or '<div class="tab-pane active" id="logsStdErr">' in response.content or # Depending on Hadoop '<div class="tab-pane active" id="logsStdOut">' in response.content, # For jenkins response.content)
def test_kill_job(self): """ Test job in kill state. """ # Run the sleep example, since it doesn't require user home directory design_id = self.design.id response = self.client.post(reverse('oozie:submit_workflow', args=[self.design.id]), data={ u'form-MAX_NUM_FORMS': [u''], u'form-INITIAL_FORMS': [u'1'], u'form-0-name': [u'REDUCER_SLEEP_TIME'], u'form-0-value': [u'1'], u'form-TOTAL_FORMS': [u'1'] }, follow=True) oozie_jobid = response.context['oozie_workflow'].id # Wait for a job to be created and fetch job ID hadoop_job_id = get_hadoop_job_id(self.oozie, oozie_jobid, 1) client2 = make_logged_in_client('test_non_superuser', is_superuser=False, groupname='test') grant_access('test_non_superuser', 'test', 'jobbrowser') response = client2.post('/jobbrowser/jobs/%s/kill' % (hadoop_job_id, )) assert_equal( "Permission denied. User test_non_superuser cannot delete user %s's job." % self.username, response.context["error"]) # Make sure that the first map task succeeds before moving on # This will keep us from hitting timing-related failures first_mapper = 'm_000000' start = time.time() timeout_sec = 60 while first_mapper not in \ self.client.get('/jobbrowser/jobs/%s/tasks?taskstate=succeeded' % (hadoop_job_id,)).content: time.sleep(1) # If this assert fails, something has probably really failed assert_true(time.time() - start < timeout_sec, "Timed out waiting for first mapper to complete") # Kill task self.client.post('/jobbrowser/jobs/%s/kill' % (hadoop_job_id, )) # It should say killed at some point response = self.client.get('/jobbrowser/jobs/%s' % (hadoop_job_id, )) html = response.content.lower() i = 0 while 'killed' not in html and i < 10: time.sleep(5) response = self.client.get('/jobbrowser/jobs/%s' % (hadoop_job_id, )) html = response.content.lower() i += 1 assert_true(views.get_shorter_id(hadoop_job_id) in html) assert_true('killed' in html, html) # Exercise select by taskstate self.client.get('/jobbrowser/jobs/%s/tasks?taskstate=failed' % (hadoop_job_id, )) self.client.get('/jobbrowser/jobs/%s/tasks?taskstate=succeeded' % (hadoop_job_id, )) self.client.get('/jobbrowser/jobs/%s/tasks?taskstate=running' % (hadoop_job_id, )) self.client.get('/jobbrowser/jobs/%s/tasks?taskstate=killed' % (hadoop_job_id, )) # Test single task page late_task_id = hadoop_job_id.replace('job', 'task') + '_r_000000' response = self.client.get('/jobbrowser/jobs/%s/tasks/%s' % (hadoop_job_id, late_task_id)) assert_false('succeed' in response.content) assert_true('killed' in response.content) # The first task should've succeeded # We use a different method of checking success for this one early_task_id = hadoop_job_id.replace('job', 'task') + '_m_000000' response = self.client.get('/jobbrowser/jobs/%s/tasks/%s' % (hadoop_job_id, early_task_id)) assert_true('succeed' in response.content) assert_false('failed' in response.content) # Test single attempt page early_task_id = hadoop_job_id.replace('job', 'task') + '_m_000000' attempt_id = early_task_id.replace('task', 'attempt') + '_0' response = self.client.get( '/jobbrowser/jobs/%s/tasks/%s/attempts/%s/logs' % (hadoop_job_id, early_task_id, attempt_id)) assert_true('syslog' in response.content) # Test dock jobs response = self.client.get('/jobbrowser/dock_jobs/') assert_false('completed' in response.content) assert_false('failed' in response.content)
def test_job(self): """ Test new job views. The status of the jobs should be the same as the status reported back by oozie. In this case, all jobs should succeed. """ # Run the sleep example, since it doesn't require user home directory design_id = self.design.id response = self.client.post(reverse('oozie:submit_workflow', args=[design_id]), data={u'form-MAX_NUM_FORMS': [u''], u'form-INITIAL_FORMS': [u'1'], u'form-0-name': [u'REDUCER_SLEEP_TIME'], u'form-0-value': [u'1'], u'form-TOTAL_FORMS': [u'1']}, follow=True) oozie_jobid = response.context['oozie_workflow'].id OozieServerProvider.wait_until_completion(oozie_jobid, timeout=120, step=1) hadoop_job_id = get_hadoop_job_id(self.oozie, oozie_jobid, 1) hadoop_job_id_short = views.get_shorter_id(hadoop_job_id) # All jobs page and fetch job ID # Taking advantage of the fact new jobs are at the top of the list! response = self.client.get('/jobbrowser/jobs/') assert_true(hadoop_job_id_short in response.content, response.content) # Make sure job succeeded response = self.client.get('/jobbrowser/jobs/?state=completed') assert_true(hadoop_job_id_short in response.content) response = self.client.get('/jobbrowser/jobs/?state=failed') assert_false(hadoop_job_id_short in response.content) response = self.client.get('/jobbrowser/jobs/?state=running') assert_false(hadoop_job_id_short in response.content) response = self.client.get('/jobbrowser/jobs/?state=killed') assert_false(hadoop_job_id_short in response.content) # Check sharing permissions # Login as ourself finish = SHARE_JOBS.set_for_testing(True) try: response = self.client.get('/jobbrowser/jobs/?user='******'/jobbrowser/jobs/?user='******'not_me', is_superuser=False, groupname='test') grant_access("not_me", "test", "jobbrowser") finish = SHARE_JOBS.set_for_testing(True) try: response = client_not_me.get('/jobbrowser/jobs/?user='******'/jobbrowser/jobs/?user='******'/jobbrowser/jobs/%s' % hadoop_job_id) # Check some counters for single job. counters = response.context['job'].counters counters_file_bytes_written = counters['org.apache.hadoop.mapreduce.FileSystemCounter']['counters']['FILE_BYTES_WRITTEN'] assert_true(counters_file_bytes_written['map'] > 0) assert_true(counters_file_bytes_written['reduce'] > 0) # We can't just check the complete contents of the python map because the # SLOTS_MILLIS_* entries have a variable number of milliseconds from # run-to-run. assert_equal(response.context['job'].counters['org.apache.hadoop.mapreduce.JobCounter']['counters']['TOTAL_LAUNCHED_MAPS']['total'], 2L) assert_equal(response.context['job'].counters['org.apache.hadoop.mapreduce.JobCounter']['counters']['TOTAL_LAUNCHED_REDUCES']['total'], 1L) assert_equal(response.context['job'].counters['org.apache.hadoop.mapreduce.JobCounter']['counters']['FALLOW_SLOTS_MILLIS_MAPS']['total'], 0L) assert_equal(response.context['job'].counters['org.apache.hadoop.mapreduce.JobCounter']['counters']['FALLOW_SLOTS_MILLIS_REDUCES']['total'], 0L) assert_true(response.context['job'].counters['org.apache.hadoop.mapreduce.JobCounter']['counters']['SLOTS_MILLIS_MAPS']['total'] > 0) assert_true(response.context['job'].counters['org.apache.hadoop.mapreduce.JobCounter']['counters']['SLOTS_MILLIS_REDUCES']['total'] > 0) # There should be 4 tasks for this job: cleanup, setup, map, reduce response = self.client.get('/jobbrowser/jobs/%s/tasks' % (hadoop_job_id,)) assert_true(len(response.context['page'].object_list), 4) # Select by tasktype response = self.client.get('/jobbrowser/jobs/%s/tasks?tasktype=reduce' % (hadoop_job_id,)) assert_true(len(response.context['page'].object_list), 1) # Select by taskstate response = self.client.get('/jobbrowser/jobs/%s/tasks?taskstate=succeeded' % (hadoop_job_id,)) assert_true(len(response.context['page'].object_list), 4) # Select by text response = self.client.get('/jobbrowser/jobs/%s/tasks?tasktext=clean' % (hadoop_job_id,)) assert_true(len(response.context['page'].object_list), 1) # Test job single logs page response = self.client.get('/jobbrowser/jobs/%s/single_logs' % (hadoop_job_id)) assert_true('syslog' in response.content) assert_true('<div class="tab-pane active" id="logsSysLog">' in response.content or '<div class="tab-pane active" id="logsStdErr">' in response.content or # Depending on Hadoop '<div class="tab-pane active" id="logsStdOut">' in response.content, # For jenkins response.content)
def test_kill_job(self): """ Test job in kill state. """ # Run the sleep example, since it doesn't require user home directory design_id = self.design.id response = self.client.post(reverse('oozie:submit_workflow', args=[self.design.id]), data={u'form-MAX_NUM_FORMS': [u''], u'form-INITIAL_FORMS': [u'1'], u'form-0-name': [u'REDUCER_SLEEP_TIME'], u'form-0-value': [u'1'], u'form-TOTAL_FORMS': [u'1']}, follow=True) oozie_jobid = response.context['oozie_workflow'].id # Wait for a job to be created and fetch job ID hadoop_job_id = get_hadoop_job_id(self.oozie, oozie_jobid, 1) client2 = make_logged_in_client('test_non_superuser', is_superuser=False, groupname='test') grant_access('test_non_superuser', 'test', 'jobbrowser') response = client2.post('/jobbrowser/jobs/%s/kill' % (hadoop_job_id,)) assert_equal("Permission denied. User test_non_superuser cannot delete user %s's job." % self.username, response.context["error"]) # Make sure that the first map task succeeds before moving on # This will keep us from hitting timing-related failures first_mapper = 'm_000000' start = time.time() timeout_sec = 60 while first_mapper not in \ self.client.get('/jobbrowser/jobs/%s/tasks?taskstate=succeeded' % (hadoop_job_id,)).content: time.sleep(1) # If this assert fails, something has probably really failed assert_true(time.time() - start < timeout_sec, "Timed out waiting for first mapper to complete") # Kill task self.client.post('/jobbrowser/jobs/%s/kill' % (hadoop_job_id,)) # It should say killed at some point response = self.client.get('/jobbrowser/jobs/%s' % (hadoop_job_id,)) html = response.content.lower() i = 0 while 'killed' not in html and i < 10: time.sleep(5) response = self.client.get('/jobbrowser/jobs/%s' % (hadoop_job_id,)) html = response.content.lower() i += 1 assert_true(views.get_shorter_id(hadoop_job_id) in html) assert_true('killed' in html, html) # Exercise select by taskstate self.client.get('/jobbrowser/jobs/%s/tasks?taskstate=failed' % (hadoop_job_id,)) self.client.get('/jobbrowser/jobs/%s/tasks?taskstate=succeeded' % (hadoop_job_id,)) self.client.get('/jobbrowser/jobs/%s/tasks?taskstate=running' % (hadoop_job_id,)) self.client.get('/jobbrowser/jobs/%s/tasks?taskstate=killed' % (hadoop_job_id,)) # Test single task page late_task_id = hadoop_job_id.replace('job', 'task') + '_r_000000' response = self.client.get('/jobbrowser/jobs/%s/tasks/%s' % (hadoop_job_id, late_task_id)) assert_false('succeed' in response.content) assert_true('killed' in response.content) # The first task should've succeeded # We use a different method of checking success for this one early_task_id = hadoop_job_id.replace('job', 'task') + '_m_000000' response = self.client.get('/jobbrowser/jobs/%s/tasks/%s' % (hadoop_job_id, early_task_id)) assert_true('succeed' in response.content) assert_false('failed' in response.content) # Test single attempt page early_task_id = hadoop_job_id.replace('job', 'task') + '_m_000000' attempt_id = early_task_id.replace('task', 'attempt') + '_0' response = self.client.get('/jobbrowser/jobs/%s/tasks/%s/attempts/%s/logs' % (hadoop_job_id, early_task_id, attempt_id)) assert_true('syslog' in response.content) # Test dock jobs response = self.client.get('/jobbrowser/dock_jobs/') assert_false('completed' in response.content) assert_false('failed' in response.content)
def test_failed_jobs(self): """ Test jobs with genuine failure, not just killed """ if is_live_cluster(): raise SkipTest("HUE-2902: Skipping because test is not reentrant") # Create design that will fail because the script file isn't there INPUT_DIR = TestJobBrowserWithHadoop.home_dir + "/input" OUTPUT_DIR = TestJobBrowserWithHadoop.home_dir + "/output" try: TestJobBrowserWithHadoop.cluster.fs.mkdir(TestJobBrowserWithHadoop.home_dir + "/jt-test_failed_jobs") TestJobBrowserWithHadoop.cluster.fs.mkdir(INPUT_DIR) TestJobBrowserWithHadoop.cluster.fs.rmtree(OUTPUT_DIR) except: LOG.exception("failed to teardown tests") job_name = "%s_%s" % (TestJobBrowserWithHadoop.username, "test_failed_jobs-1") response = TestJobBrowserWithHadoop.client.post( reverse("jobsub.views.new_design", kwargs={"node_type": "mapreduce"}), { "name": [job_name], "description": ["description test_failed_jobs-1"], "args": "", "jar_path": "/user/hue/oozie/workspaces/lib/hadoop-examples.jar", "prepares": "[]", "archives": "[]", "files": "[]", "job_properties": [ '[{"name":"mapred.input.dir","value":"%s"},\ {"name":"mapred.output.dir","value":"%s"},\ {"name":"mapred.mapper.class","value":"org.apache.hadoop.mapred.lib.dne"},\ {"name":"mapred.combiner.class","value":"org.apache.hadoop.mapred.lib.dne"},\ {"name":"mapred.reducer.class","value":"org.apache.hadoop.mapred.lib.dne"}]' % (INPUT_DIR, OUTPUT_DIR) ], }, HTTP_X_REQUESTED_WITH="XMLHttpRequest", follow=True, ) # Submit the job design_dict = json.loads(response.content) design_id = int(design_dict["id"]) response = TestJobBrowserWithHadoop.client.post( reverse("oozie:submit_workflow", args=[design_id]), data={ u"form-MAX_NUM_FORMS": [u""], u"form-INITIAL_FORMS": [u"1"], u"form-0-name": [u"REDUCER_SLEEP_TIME"], u"form-0-value": [u"1"], u"form-TOTAL_FORMS": [u"1"], }, follow=True, ) oozie_jobid = response.context["oozie_workflow"].id job = OozieServerProvider.wait_until_completion(oozie_jobid) hadoop_job_id = get_hadoop_job_id(TestJobBrowserWithHadoop.oozie, oozie_jobid, 1) hadoop_job_id_short = views.get_shorter_id(hadoop_job_id) # Select only killed jobs (should be absent) # Taking advantage of the fact new jobs are at the top of the list! response = TestJobBrowserWithHadoop.client.get("/jobbrowser/jobs/?format=json&state=killed") assert_false(hadoop_job_id_short in response.content) # Select only failed jobs (should be present) # Map job should succeed. Reduce job should fail. response = TestJobBrowserWithHadoop.client.get("/jobbrowser/jobs/?format=json&state=failed") assert_true(hadoop_job_id_short in response.content) raise SkipTest # Not compatible with MR2 # The single job view should have the failed task table response = TestJobBrowserWithHadoop.client.get("/jobbrowser/jobs/%s" % (hadoop_job_id,)) html = response.content.lower() assert_true("failed task" in html, html) # The map task should say success (empty input) map_task_id = TestJobBrowserWithHadoop.hadoop_job_id.replace("job", "task") + "_m_000000" response = TestJobBrowserWithHadoop.client.get("/jobbrowser/jobs/%s/tasks/%s" % (hadoop_job_id, map_task_id)) assert_true("succeed" in response.content) assert_true("failed" not in response.content) # The reduce task should say failed reduce_task_id = hadoop_job_id.replace("job", "task") + "_r_000000" response = TestJobBrowserWithHadoop.client.get("/jobbrowser/jobs/%s/tasks/%s" % (hadoop_job_id, reduce_task_id)) assert_true("succeed" not in response.content) assert_true("failed" in response.content) # Selecting by failed state should include the failed map response = TestJobBrowserWithHadoop.client.get("/jobbrowser/jobs/%s/tasks?taskstate=failed" % (hadoop_job_id,)) assert_true("r_000000" in response.content) assert_true("m_000000" not in response.content)
def test_job(self): """ Test new job views. The status of the jobs should be the same as the status reported back by oozie. In this case, all jobs should succeed. """ # Clone design assert_equal(0, OozieDesign.objects.filter(owner__username=self.username).count()) self.client.post('/jobsub/clone_design/%d' % self.sleep_design_id) assert_equal(1, OozieDesign.objects.filter(owner__username=self.username).count()) # Run the sleep example, since it doesn't require user home directory design_id = OozieDesign.objects.get(owner__username=self.username).id response = self.client.post("/jobsub/submit_design/%d" % (design_id,), dict(map_sleep_time=1, num_maps=1, num_reduces=1, reduce_sleep_time=1), follow=True) oozie_jobid = response.context['jobid'] job = OozieServerProvider.wait_until_completion(oozie_jobid, timeout=120, step=1) hadoop_job_id = get_hadoop_job_id(self.oozie, oozie_jobid, 1) hadoop_job_id_short = views.get_shorter_id(hadoop_job_id) # All jobs page and fetch job ID # Taking advantage of the fact new jobs are at the top of the list! response = self.client.get('/jobbrowser/jobs/') assert_true(hadoop_job_id_short in response.content) # Make sure job succeeded response = self.client.get('/jobbrowser/jobs/?state=completed') assert_true(hadoop_job_id_short in response.content) response = self.client.get('/jobbrowser/jobs/?state=failed') assert_false(hadoop_job_id_short in response.content) response = self.client.get('/jobbrowser/jobs/?state=running') assert_false(hadoop_job_id_short in response.content) response = self.client.get('/jobbrowser/jobs/?state=killed') assert_false(hadoop_job_id_short in response.content) # Check sharing permissions # Login as ourself finish = SHARE_JOBS.set_for_testing(True) try: response = self.client.get('/jobbrowser/jobs/?user='******'/jobbrowser/jobs/?user='******'not_me', is_superuser=False, groupname='test') grant_access("not_me", "test", "jobbrowser") finish = SHARE_JOBS.set_for_testing(True) try: response = client_not_me.get('/jobbrowser/jobs/?user='******'/jobbrowser/jobs/?user='******'/jobbrowser/jobs/%s' % hadoop_job_id) # Check some counters for single job. counters = response.context['job'].counters counters_file_bytes_written = counters['org.apache.hadoop.mapreduce.FileSystemCounter']['counters']['FILE_BYTES_WRITTEN'] assert_true(counters_file_bytes_written['map'] > 0) assert_true(counters_file_bytes_written['reduce'] > 0) # We can't just check the complete contents of the python map because the # SLOTS_MILLIS_* entries have a variable number of milliseconds from # run-to-run. assert_equal(response.context['job'].counters['org.apache.hadoop.mapreduce.JobCounter']['counters']['TOTAL_LAUNCHED_MAPS']['total'], 1) assert_equal(response.context['job'].counters['org.apache.hadoop.mapreduce.JobCounter']['counters']['TOTAL_LAUNCHED_REDUCES']['total'], 1) assert_equal(response.context['job'].counters['org.apache.hadoop.mapreduce.JobCounter']['counters']['FALLOW_SLOTS_MILLIS_MAPS']['total'], 0) assert_equal(response.context['job'].counters['org.apache.hadoop.mapreduce.JobCounter']['counters']['FALLOW_SLOTS_MILLIS_REDUCES']['total'], 0) assert_true(response.context['job'].counters['org.apache.hadoop.mapreduce.JobCounter']['counters']['SLOTS_MILLIS_MAPS']['total'] > 0) assert_true(response.context['job'].counters['org.apache.hadoop.mapreduce.JobCounter']['counters']['SLOTS_MILLIS_REDUCES']['total'] > 0) # There should be 4 tasks for this job: cleanup, setup, map, reduce response = self.client.get('/jobbrowser/jobs/%s/tasks' % (hadoop_job_id,)) assert_true(len(response.context['page'].object_list), 4) # Select by tasktype response = self.client.get('/jobbrowser/jobs/%s/tasks?tasktype=reduce' % (hadoop_job_id,)) assert_true(len(response.context['page'].object_list), 1) # Select by taskstate response = self.client.get('/jobbrowser/jobs/%s/tasks?taskstate=succeeded' % (hadoop_job_id,)) assert_true(len(response.context['page'].object_list), 4) # Select by text response = self.client.get('/jobbrowser/jobs/%s/tasks?tasktext=clean' % (hadoop_job_id,)) assert_true(len(response.context['page'].object_list), 1) # Test job single logs page response = self.client.get('/jobbrowser/jobs/%s/single_logs' % (hadoop_job_id)) assert_true('syslog' in response.content)
def test_kill_job(self): """ Test job in kill state. """ # Clone design assert_equal(0, OozieDesign.objects.filter(owner__username=self.username).count()) self.client.post('/jobsub/clone_design/%d' % self.sleep_design_id) assert_equal(1, OozieDesign.objects.filter(owner__username=self.username).count()) # Run the sleep example, since it doesn't require user home directory design_id = OozieDesign.objects.get(owner__username=self.username).id response = self.client.post("/jobsub/submit_design/%d" % (design_id,), dict(map_sleep_time=1, num_maps=1, num_reduces=1, reduce_sleep_time=1), follow=True) oozie_jobid = response.context['jobid'] # Wait for a job to be created and fetch job ID hadoop_job_id = get_hadoop_job_id(self.oozie, oozie_jobid, 1) client2 = make_logged_in_client('test_non_superuser', is_superuser=False, groupname='test') grant_access('test_non_superuser', 'test', 'jobbrowser') response = client2.post('/jobbrowser/jobs/%s/kill' % (hadoop_job_id,)) assert_equal("Permission denied. User test_non_superuser cannot delete user %s's job." % self.username, response.context["error"]) # Make sure that the first map task succeeds before moving on # This will keep us from hitting timing-related failures first_mapper = 'm_000000' start = time.time() timeout_sec = 60 while first_mapper not in \ self.client.get('/jobbrowser/jobs/%s/tasks?taskstate=succeeded' % (hadoop_job_id,)).content: time.sleep(1) # If this assert fails, something has probably really failed assert_true(time.time() - start < timeout_sec, "Timed out waiting for first mapper to complete") # Kill task self.client.post('/jobbrowser/jobs/%s/kill' % (hadoop_job_id,)) # It should say killed response = self.client.get('/jobbrowser/jobs/%s' % (hadoop_job_id,)) html = response.content.lower() assert_true(views.get_shorter_id(hadoop_job_id) in html) assert_true('killed' in html) # Exercise select by taskstate self.client.get('/jobbrowser/jobs/%s/tasks?taskstate=failed' % (hadoop_job_id,)) self.client.get('/jobbrowser/jobs/%s/tasks?taskstate=succeeded' % (hadoop_job_id,)) self.client.get('/jobbrowser/jobs/%s/tasks?taskstate=running' % (hadoop_job_id,)) self.client.get('/jobbrowser/jobs/%s/tasks?taskstate=killed' % (hadoop_job_id,)) # Test single task page late_task_id = hadoop_job_id.replace('job', 'task') + '_r_000000' response = self.client.get('/jobbrowser/jobs/%s/tasks/%s' % (hadoop_job_id, late_task_id)) assert_false('succeed' in response.content) assert_true('killed' in response.content) # The first task should've succeeded # We use a different method of checking success for this one early_task_id = hadoop_job_id.replace('job', 'task') + '_m_000000' response = self.client.get('/jobbrowser/jobs/%s/tasks/%s' % (hadoop_job_id, early_task_id)) assert_true('succeed' in response.content) assert_false('failed' in response.content) # Test single attempt page early_task_id = hadoop_job_id.replace('job', 'task') + '_m_000000' attempt_id = early_task_id.replace('task', 'attempt') + '_0' response = self.client.get('/jobbrowser/jobs/%s/tasks/%s/attempts/%s/logs' % (hadoop_job_id, early_task_id, attempt_id)) assert_true('syslog' in response.content) # Test dock jobs response = self.client.get('/jobbrowser/dock_jobs/') assert_false('completed' in response.content) assert_false('failed' in response.content)
def test_failed_jobs(self): """ Test jobs with genuine failure, not just killed """ # Create design that will fail because the script file isn't there INPUT_DIR = self.home_dir + '/input' OUTPUT_DIR = self.home_dir + '/output' try: self.cluster.fs.mkdir(self.home_dir + "/jt-test_failed_jobs") self.cluster.fs.mkdir(INPUT_DIR) self.cluster.fs.rmtree(OUTPUT_DIR) except: # rmtree probably failed here. pass response = self.client.post('/jobsub/new_design/mapreduce', { 'wf-name': ['test_failed_jobs-1'], 'wf-description': ['description test_failed_jobs-1'], 'action-args': [''], 'action-jar_path': ['/user/hue/jobsub/examples/hadoop-examples.jar'], 'action-archives': ['[]'], 'action-job_properties': ['[{"name":"mapred.input.dir","value":"%s"},\ {"name":"mapred.output.dir","value":"%s"},\ {"name":"mapred.mapper.class","value":"org.apache.hadoop.mapred.lib.dne"},\ {"name":"mapred.combiner.class","value":"org.apache.hadoop.mapred.lib.dne"},\ {"name":"mapred.reducer.class","value":"org.apache.hadoop.mapred.lib.dne"}]' % (INPUT_DIR, OUTPUT_DIR)], 'action-files': ['[]']}, follow=True) designs = json.loads(response.context['designs']) # Submit the job design_id = designs[0]['id'] response = self.client.post("/jobsub/submit_design/%d" % design_id, follow=True) oozie_jobid = response.context['jobid'] OozieServerProvider.wait_until_completion(oozie_jobid, timeout=500, step=1) hadoop_job_id = get_hadoop_job_id(self.oozie, oozie_jobid, 1) hadoop_job_id_short = views.get_shorter_id(hadoop_job_id) # Select only killed jobs (should be absent) # Taking advantage of the fact new jobs are at the top of the list! response = self.client.get('/jobbrowser/jobs/?state=killed') assert_false(hadoop_job_id_short in response.content) # Select only failed jobs (should be present) # Map job should succeed. Reduce job should fail. response = self.client.get('/jobbrowser/jobs/?state=failed') assert_true(hadoop_job_id_short in response.content) # The single job view should have the failed task table response = self.client.get('/jobbrowser/jobs/%s' % (hadoop_job_id,)) html = response.content.lower() assert_true('failed task' in html) # The map task should say success (empty input) map_task_id = hadoop_job_id.replace('job', 'task') + '_m_000000' response = self.client.get('/jobbrowser/jobs/%s/tasks/%s' % (hadoop_job_id, map_task_id)) assert_true('succeed' in response.content) assert_true('failed' not in response.content) # The reduce task should say failed reduce_task_id = hadoop_job_id.replace('job', 'task') + '_r_000000' response = self.client.get('/jobbrowser/jobs/%s/tasks/%s' % (hadoop_job_id, reduce_task_id)) assert_true('succeed' not in response.content) assert_true('failed' in response.content) # Selecting by failed state should include the failed map response = self.client.get('/jobbrowser/jobs/%s/tasks?taskstate=failed' % (hadoop_job_id,)) assert_true('r_000000' in response.content) assert_true('m_000000' not in response.content)