def job_not_assigned(request, jobid, path): if request.GET.get('format') == 'json': result = {'status': -1, 'message': ''} try: get_api(request.user, request.jt).get_job(jobid=jobid) result['status'] = 0 except ApplicationNotRunning, e: result['status'] = 1 except Exception, e: result['message'] = _('Error polling job %s: %s') % (jobid, e)
def job_not_assigned(request, jobid, path): if request.GET.get("format") == "json": result = {"status": -1, "message": ""} try: get_api(request.user, request.jt).get_job(jobid=jobid) result["status"] = 0 except ApplicationNotRunning, e: result["status"] = 1 except Exception, e: result["message"] = _("Error polling job %s: %s") % (jobid, e)
def single_tracker(request, trackerid): jt = get_api(request.user, request.jt) try: tracker = jt.get_tracker(trackerid) except Exception, e: raise PopupException(_('The tracker could not be contacted.'), detail=e)
def jobs(request): user = request.POST.get('user', request.user.username) state = request.POST.get('state') text = request.POST.get('text') retired = request.POST.get('retired') time_value = request.POST.get('time_value', 7) time_unit = request.POST.get('time_unit', 'days') if request.POST.get('format') == 'json': try: # Limit number of jobs to be 1000 jobs = get_api(request.user, request.jt).get_jobs(user=request.user, username=user, state=state, text=text, retired=retired, limit=1000, time_value=int(time_value), time_unit=time_unit) except Exception, ex: ex_message = str(ex) if 'Connection refused' in ex_message or 'standby RM' in ex_message: raise PopupException(_('Resource Manager cannot be contacted or might be down.')) elif 'Could not connect to' in ex_message: raise PopupException(_('Job Tracker cannot be contacted or might be down.')) else: raise PopupException(ex) json_jobs = { 'jobs': [massage_job_for_json(job, request) for job in jobs], } return JsonResponse(json_jobs, encoder=JSONEncoderForHTML)
def job_attempt_logs_json(request, job, attempt_index=0, name='syslog', offset=LOG_OFFSET_BYTES, is_embeddable=False): """For async log retrieval as Yarn servers are very slow""" log_link = None response = {'status': -1} try: jt = get_api(request.user, request.jt) app = jt.get_application(job.jobId) if app['applicationType'] == 'MAPREDUCE': if app['finalStatus'] in ('SUCCEEDED', 'FAILED', 'KILLED'): attempt_index = int(attempt_index) if not job.job_attempts['jobAttempt']: response = {'status': 0, 'log': _('Job has no tasks')} else: attempt = job.job_attempts['jobAttempt'][attempt_index] log_link = attempt['logsLink'] # Reformat log link to use YARN RM, replace node addr with node ID addr log_link = log_link.replace(attempt['nodeHttpAddress'], attempt['nodeId']) elif app['state'] == 'RUNNING': log_link = app['amContainerLogs'] elif app.get('amContainerLogs'): log_link = app.get('amContainerLogs') except (KeyError, RestException), e: raise KeyError(_("Cannot find job attempt '%(id)s'.") % {'id': job.jobId}, e)
def jobs(request): user = request.POST.get('user', request.user.username) state = request.POST.get('state') text = request.POST.get('text') retired = request.POST.get('retired') if request.POST.get('format') == 'json': try: # Limit number of jobs to be 1000 jobs = get_api(request.user, request.jt).get_jobs(user=request.user, username=user, state=state, text=text, retired=retired, limit=1000) except Exception, ex: ex_message = str(ex) if 'Connection refused' in ex_message or 'standby RM' in ex_message: raise PopupException( _('Resource Manager cannot be contacted or might be down.') ) elif 'Could not connect to' in ex_message: raise PopupException( _('Job Tracker cannot be contacted or might be down.')) else: raise PopupException(ex) json_jobs = { 'jobs': [massage_job_for_json(job, request) for job in jobs], } return JsonResponse(json_jobs, encoder=JSONEncoderForHTML)
def single_task(request, job, taskid): jt = get_api(request.user, request.jt) job_link = jt.get_job_link(job.jobId) task = job_link.get_task(taskid) return render("task.mako", request, {'task': task, 'joblnk': job_link})
def single_task(request, job, taskid): jt = get_api(request.user, request.jt) job_link = jt.get_job_link(job.jobId) task = job_link.get_task(taskid) return render("task.mako", request, {"task": task, "joblnk": job_link})
def decorate(request, *args, **kwargs): jobid = kwargs['job'] try: job = get_api(request.user, request.jt).get_job(jobid=jobid) except ApplicationNotRunning, e: # reverse() seems broken, using request.path but beware, it discards GET and POST info return job_not_assigned(request, jobid, request.path)
def single_tracker(request, trackerid): jt = get_api(request.user, request.jt) try: tracker = jt.get_tracker(trackerid) except Exception, e: raise PopupException(_('The container disappears as soon as the job finishes.'), detail=e)
def kill_job(request, job): if request.method != "POST": raise Exception(_("kill_job may only be invoked with a POST (got a %(method)s).") % {'method': request.method}) if job.user != request.user.username and not request.user.is_superuser: access_warn(request, _('Insufficient permission')) raise MessageException(_("Permission denied. User %(username)s cannot delete user %(user)s's job.") % {'username': request.user.username, 'user': job.user}) job.kill() cur_time = time.time() api = get_api(request.user, request.jt) while time.time() - cur_time < 15: job = api.get_job(jobid=job.jobId) if job.status not in ["RUNNING", "QUEUED"]: if request.REQUEST.get("next"): return HttpResponseRedirect(request.REQUEST.get("next")) elif request.REQUEST.get("format") == "json": return JsonResponse({'status': 0}, encoder=JSONEncoderForHTML) else: raise MessageException("Job Killed") time.sleep(1) raise Exception(_("Job did not appear as killed within 15 seconds."))
def jobs(request): user = request.POST.get('user', request.user.username) state = request.POST.get('state') text = request.POST.get('text') retired = request.POST.get('retired') time_value = request.POST.get('time_value', 7) time_unit = request.POST.get('time_unit', 'days') if request.POST.get('format') == 'json': try: # Limit number of jobs to be 1000 jobs = get_api(request.user, request.jt).get_jobs(user=request.user, username=user, state=state, text=text, retired=retired, limit=1000, time_value=int(time_value), time_unit=time_unit) except Exception as ex: ex_message = str(ex) if 'Connection refused' in ex_message or 'standby RM' in ex_message or 'Name or service not known' in ex_message: response = { 'status': 1, 'message': _('Resource Manager cannot be contacted or might be down.') } return JsonResponse(response) elif 'Could not connect to' in ex_message: raise PopupException( _('Job Tracker cannot be contacted or might be down.')) else: raise PopupException(ex) json_jobs = { 'jobs': [massage_job_for_json(job, request) for job in jobs], } return JsonResponse(json_jobs, encoder=JSONEncoderForHTML) return render( 'jobs.mako', request, { 'request': request, 'state_filter': state, 'user_filter': user, 'text_filter': text, 'retired': retired, 'filtered': not (state == 'all' and user == '' and text == ''), 'is_yarn': cluster.is_yarn(), 'hiveserver2_impersonation_enabled': hiveserver2_impersonation_enabled() })
def container(request, node_manager_http_address, containerid): jt = get_api(request.user, request.jt) try: tracker = jt.get_tracker(node_manager_http_address, containerid) except Exception, e: # TODO: add a redirect of some kind raise PopupException(_('The container disappears as soon as the job finishes.'), detail=e)
def test_yarn_configurations(user): result = [] try: from jobbrowser.api import get_api # Required for cluster HA testing except Exception as e: LOG.warn('Jobbrowser is disabled, skipping test_yarn_configurations') return result try: get_api(user, None).get_jobs(user, username=user.username, state='all', text='') except Exception as e: msg = 'Failed to contact an active Resource Manager: %s' % e LOG.exception(msg) result.append(('Resource Manager', msg)) return result
def single_tracker(request, trackerid): jt = get_api(request.user, request.jt) try: tracker = jt.get_tracker(trackerid) except Exception, e: raise PopupException( _('The container disappears as soon as the job finishes.'), detail=e)
def get_job(request, job_id): try: job = get_api(request.user, request.jt).get_job(jobid=job_id) except ApplicationNotRunning, e: if e.job.get('state', '').lower() == 'accepted': rm_api = resource_manager_api.get_resource_manager(request.user) job = Application(e.job, rm_api) else: raise e # Job has not yet been accepted by RM
def job_not_assigned(request, jobid, path): if request.GET.get('format') == 'json': result = {'status': -1, 'message': ''} try: get_api(request.user, request.jt).get_job(jobid=jobid) result['status'] = 0 except ApplicationNotRunning as e: result['status'] = 1 except Exception as e: result['message'] = _('Error polling job %s: %s') % (jobid, e) return JsonResponse(result, encoder=JSONEncoderForHTML) else: return render('job_not_assigned.mako', request, { 'jobid': jobid, 'path': path })
def decorate(request, *args, **kwargs): jobid = kwargs['job'] try: job = get_api(request.user, request.jt).get_job(jobid=jobid) except Exception, e: raise PopupException( _('Could not find job %s. The job might not be running yet.') % jobid, detail=e)
def single_tracker(request, trackerid): jt = get_api(request.user, request.jt) try: tracker = jt.get_tracker(trackerid) except Exception as e: raise PopupException(_('The tracker could not be contacted.'), detail=e) return render("tasktracker.mako", request, {'tracker': tracker})
def single_task_attempt_logs(request, job, taskid, attemptid): jt = get_api(request.user, request.jt) job_link = jt.get_job_link(job.jobId) task = job_link.get_task(taskid) try: attempt = task.get_attempt(attemptid) except (KeyError, RestException), e: raise KeyError(_("Cannot find attempt '%(id)s' in task") % {'id': attemptid}, e)
def single_task_attempt(request, job, taskid, attemptid): jt = get_api(request.user, request.jt) job_link = jt.get_job_link(job.jobId) task = job_link.get_task(taskid) try: attempt = task.get_attempt(attemptid) except (KeyError, RestException), e: raise PopupException(_("Cannot find attempt '%(id)s' in task") % {'id': attemptid}, e)
def single_task_attempt_logs(request, job, taskid, attemptid, offset=LOG_OFFSET_BYTES): jt = get_api(request.user, request.jt) job_link = jt.get_job_link(job.jobId) task = job_link.get_task(taskid) try: attempt = task.get_attempt(attemptid) except (KeyError, RestException), e: raise KeyError(_("Cannot find attempt '%(id)s' in task") % {'id': attemptid}, e)
def decorate(request, *args, **kwargs): jobid = kwargs['job'] try: job = get_api(request.user, request.jt).get_job(jobid=jobid) except ApplicationNotRunning, e: if e.job.get('state', '').lower() == 'accepted' and 'kill' in request.path: rm_api = resource_manager_api.get_resource_manager(request.user) job = Application(e.job, rm_api) else: # reverse() seems broken, using request.path but beware, it discards GET and POST info return job_not_assigned(request, jobid, request.path)
def kill_job_by_jt(request): if request.method != "POST": raise Exception("kill_job may only be invoked with a POST (got a %(method)s).") % dict(method=request.method) job_id = request.POST['job_id'] # check job permission try: api = get_api(request.user, request.jt) job = api.get_job(jobid=job_id) except Exception, e: return HttpResponse(json.dumps({"text": "Could not find job %s. The job might not be running yet." % job_id}))
def get_job(request, job_id): try: job = get_api(request.user, request.jt).get_job(jobid=job_id) except ApplicationNotRunning, e: if e.job.get('state', '').lower() == 'accepted' and 'kill' in request.path: rm_pool = resource_manager_api.get_resource_manager_pool() rm_api = rm_pool.get(request.user.username) job = Application(e.job, rm_api) rm_pool.put(rm_api) else: # reverse() seems broken, using request.path but beware, it discards GET and POST info return job_not_assigned(request, job_id, request.path)
def jobs(request): user = request.GET.get('user', request.user.username) state = request.GET.get('state') text = request.GET.get('text') retired = request.GET.get('retired') jobs = get_api(request.user, request.jt).get_jobs(user=request.user, username=user, state=state, text=text, retired=retired) return render('jobs.mako', request, { 'jobs': jobs, 'request': request, 'state_filter': state, 'user_filter': user, 'text_filter': text, 'retired': retired, 'filtered': not (state == 'all' and user == '' and text == '') })
def single_task_attempt(request, job, taskid, attemptid): jt = get_api(request.user, request.jt) job_link = jt.get_job_link(job.jobId) task = job_link.get_task(taskid) try: attempt = task.get_attempt(attemptid) except (KeyError, RestException) as e: raise PopupException( _("Cannot find attempt '%(id)s' in task") % {'id': attemptid}, e) return render("attempt.mako", request, { "attempt": attempt, "taskid": taskid, "joblnk": job_link, "task": task })
def get_job(request, job_id): try: job = get_api(request.user, request.jt).get_job(jobid=job_id) except ApplicationNotRunning as e: if e.job.get('state', '').lower() == 'accepted': rm_api = resource_manager_api.get_resource_manager(request.user) job = Application(e.job, rm_api) else: raise e # Job has not yet been accepted by RM except JobExpired as e: raise PopupException( _('Job %s has expired.') % job_id, detail=_('Cannot be found on the History Server.')) except Exception as e: msg = 'Could not find job %s.' LOG.exception(msg % job_id) raise PopupException(_(msg) % job_id, detail=e) return job
def kill_job_by_jt(request): if request.method != "POST": raise Exception( "kill_job may only be invoked with a POST (got a %(method)s)." ) % dict(method=request.method) job_id = request.POST['job_id'] # check job permission try: api = get_api(request.user, request.jt) job = api.get_job(jobid=job_id) except Exception, e: return HttpResponse( json.dumps({ "text": "Could not find job %s. The job might not be running yet." % job_id }))
def jobs(request): user = request.GET.get('user', request.user.username) state = request.GET.get('state') text = request.GET.get('text') retired = request.GET.get('retired') if request.GET.get('format') == 'json': try: jobs = get_api(request.user, request.jt).get_jobs(user=request.user, username=user, state=state, text=text, retired=retired) except Exception, ex: ex_message = str(ex) if 'Connection refused' in ex_message or 'standby RM' in ex_message: raise PopupException(_('Resource Manager cannot be contacted or might be down.')) elif 'Could not connect to' in ex_message: raise PopupException(_('Job Tracker cannot be contacted or might be down.')) else: raise ex json_jobs = [massage_job_for_json(job, request) for job in jobs] return HttpResponse(encode_json_for_js(json_jobs), mimetype="application/json")
def jobs(request): user = request.GET.get('user', request.user.username) state = request.GET.get('state') text = request.GET.get('text') retired = request.GET.get('retired') if request.GET.get('format') == 'json': jobs = get_api(request.user, request.jt).get_jobs(user=request.user, username=user, state=state, text=text, retired=retired) json_jobs = [massage_job_for_json(job, request) for job in jobs] return HttpResponse(encode_json_for_js(json_jobs), mimetype="application/json") return render('jobs.mako', request, { 'request': request, 'state_filter': state, 'user_filter': user, 'text_filter': text, 'retired': retired, 'filtered': not (state == 'all' and user == '' and text == '') })
def tasks(request, job): """ We get here from /jobs/job/tasks?filterargs, with the options being: page=<n> - Controls pagination. Defaults to 1. tasktype=<type> - Type can be one of hadoop.job_tracker.VALID_TASK_TYPES ("map", "reduce", "job_cleanup", "job_setup") taskstate=<state> - State can be one of hadoop.job_tracker.VALID_TASK_STATES ("succeeded", "failed", "running", "pending", "killed") tasktext=<text> - Where <text> is a string matching info on the task """ ttypes = request.GET.get("tasktype") tstates = request.GET.get("taskstate") ttext = request.GET.get("tasktext") pagenum = int(request.GET.get("page", 1)) pagenum = pagenum > 0 and pagenum or 1 filters = { "task_types": ttypes and set(ttypes.split(",")) or None, "task_states": tstates and set(tstates.split(",")) or None, "task_text": ttext, "pagenum": pagenum, } jt = get_api(request.user, request.jt) task_list = jt.get_tasks(job.jobId, **filters) filter_params = copy_query_dict(request.GET, ("tasktype", "taskstate", "tasktext")).urlencode() return render( "tasks.mako", request, { "request": request, "filter_params": filter_params, "job": job, "task_list": task_list, "tasktype": ttypes, "taskstate": tstates, "tasktext": ttext, }, )
def tasks(request, job): """ We get here from /jobs/job/tasks?filterargs, with the options being: page=<n> - Controls pagination. Defaults to 1. tasktype=<type> - Type can be one of hadoop.job_tracker.VALID_TASK_TYPES ("map", "reduce", "job_cleanup", "job_setup") taskstate=<state> - State can be one of hadoop.job_tracker.VALID_TASK_STATES ("succeeded", "failed", "running", "pending", "killed") tasktext=<text> - Where <text> is a string matching info on the task """ ttypes = request.GET.get('tasktype') tstates = request.GET.get('taskstate') ttext = request.GET.get('tasktext') pagenum = int(request.GET.get('page', 1)) pagenum = pagenum > 0 and pagenum or 1 filters = { 'task_types': ttypes and set(ttypes.split(',')) or None, 'task_states': tstates and set(tstates.split(',')) or None, 'task_text': ttext, 'pagenum': pagenum, } jt = get_api(request.user, request.jt) task_list = jt.get_tasks(job.jobId, **filters) page = jt.paginate_task(task_list, pagenum) filter_params = copy_query_dict( request.GET, ('tasktype', 'taskstate', 'tasktext')).urlencode() return render( "tasks.mako", request, { 'request': request, 'filter_params': filter_params, 'job': job, 'page': page, 'tasktype': ttypes, 'taskstate': tstates, 'tasktext': ttext })
def job_attempt_logs_json(request, job, attempt_index=0, name="syslog", offset=LOG_OFFSET_BYTES): """For async log retrieval as Yarn servers are very slow""" log_link = None response = {"status": -1} try: jt = get_api(request.user, request.jt) app = jt.get_application(job.jobId) if app["applicationType"] == "MAPREDUCE": if app["finalStatus"] in ("SUCCEEDED", "FAILED", "KILLED"): attempt_index = int(attempt_index) attempt = job.job_attempts["jobAttempt"][attempt_index] log_link = attempt["logsLink"] # Reformat log link to use YARN RM, replace node addr with node ID addr log_link = log_link.replace(attempt["nodeHttpAddress"], attempt["nodeId"]) elif app["state"] == "RUNNING": log_link = app["amContainerLogs"] except (KeyError, RestException), e: raise KeyError(_("Cannot find job attempt '%(id)s'.") % {"id": job.jobId}, e)
def job_attempt_logs_json(request, job, attempt_index=0, name='syslog', offset=LOG_OFFSET_BYTES): """For async log retrieval as Yarn servers are very slow""" log_link = None response = {'status': -1} try: jt = get_api(request.user, request.jt) app = jt.get_application(job.jobId) if app['applicationType'] == 'MAPREDUCE': if app['finalStatus'] in ('SUCCEEDED', 'FAILED', 'KILLED'): attempt_index = int(attempt_index) attempt = job.job_attempts['jobAttempt'][attempt_index] log_link = attempt['logsLink'] # Reformat log link to use YARN RM, replace node addr with node ID addr log_link = log_link.replace(attempt['nodeHttpAddress'], attempt['nodeId']) elif app['state'] == 'RUNNING': log_link = app['amContainerLogs'] except (KeyError, RestException), e: raise KeyError(_("Cannot find job attempt '%(id)s'.") % {'id': job.jobId}, e)
def jobs(request): user = request.GET.get("user", request.user.username) state = request.GET.get("state") text = request.GET.get("text") retired = request.GET.get("retired") if request.GET.get("format") == "json": try: # Limit number of jobs to be 10,000 jobs = get_api(request.user, request.jt).get_jobs( user=request.user, username=user, state=state, text=text, retired=retired, limit=10000 ) except Exception, ex: ex_message = str(ex) if "Connection refused" in ex_message or "standby RM" in ex_message: raise PopupException(_("Resource Manager cannot be contacted or might be down.")) elif "Could not connect to" in ex_message: raise PopupException(_("Job Tracker cannot be contacted or might be down.")) else: raise ex json_jobs = {"jobs": [massage_job_for_json(job, request) for job in jobs]} return JsonResponse(json_jobs, encoder=JSONEncoderForHTML)
def jobs(request): user = request.GET.get('user', request.user.username) state = request.GET.get('state') text = request.GET.get('text') retired = request.GET.get('retired') jobs = get_api(request.user, request.jt).get_jobs(user=request.user, username=user, state=state, text=text, retired=retired) return render( 'jobs.mako', request, { 'jobs': jobs, 'request': request, 'state_filter': state, 'user_filter': user, 'text_filter': text, 'retired': retired, 'filtered': not (state == 'all' and user == '' and text == '') })
@access_log_level(logging.WARN) @check_job_permission def kill_job(request, job): if request.method != "POST": raise Exception( _("kill_job may only be invoked with a POST (got a %(method)s).") % {'method': request.method}) try: job.kill() except Exception, e: LOG.exception('Killing job') raise PopupException(e) cur_time = time.time() api = get_api(request.user, request.jt) while time.time() - cur_time < 15: try: job = api.get_job(jobid=job.jobId) except Exception, e: LOG.warn('Failed to get job with ID %s: %s' % (job.jobId, e)) else: if job.status not in ["RUNNING", "QUEUED"]: if request.REQUEST.get("next"): return HttpResponseRedirect(request.REQUEST.get("next")) elif request.REQUEST.get("format") == "json": return JsonResponse({'status': 0}, encoder=JSONEncoderForHTML) else: raise MessageException("Job Killed")
res.extend(test_yarn_configurations(user)) for name in YARN_CLUSTERS.keys(): cluster = YARN_CLUSTERS[name] if cluster.SUBMIT_TO.get(): submit_to.append('yarn_clusters.' + name) if not submit_to: res.append(("hadoop", "Please designate one of the MapReduce or " "Yarn clusters with `submit_to=true' in order to run jobs.")) return res def test_yarn_configurations(user): result = [] try: from jobbrowser.api import get_api # Required for cluster HA testing except Exception, e: LOG.warn('Jobbrowser is disabled, skipping test_yarn_configurations') return result try: get_api(user, None).get_jobs(user, username=user.username, state='all', text='') except Exception, e: msg = 'Failed to contact an active Resource Manager: %s' % e LOG.exception(msg) result.append(('Resource Manager', msg)) return result
def decorate(request, *args, **kwargs): jobid = kwargs['job'] try: job = get_api(request.user, request.jt).get_job(jobid=jobid) except Exception, e: raise PopupException(_('Could not find job %s. The job might not be running yet.') % jobid, detail=e)
def single_task_attempt_logs(request, job, taskid, attemptid, offset=LOG_OFFSET_BYTES): jt = get_api(request.user, request.jt) job_link = jt.get_job_link(job.jobId) task = job_link.get_task(taskid) try: attempt = task.get_attempt(attemptid) except (KeyError, RestException) as e: raise KeyError( _("Cannot find attempt '%(id)s' in task") % {'id': attemptid}, e) first_log_tab = 0 try: # Add a diagnostic log if hasattr(task, 'job') and hasattr(task.job, 'diagnostics'): diagnostic_log = task.job.diagnostics elif job_link.is_mr2: diagnostic_log = attempt.diagnostics else: diagnostic_log = ", ".join(task.diagnosticMap[attempt.attemptId]) logs = [diagnostic_log] # Add remaining logs logs += [ section.strip() for section in attempt.get_task_log(offset=offset) ] log_tab = [i for i, log in enumerate(logs) if log] if log_tab: first_log_tab = log_tab[0] except urllib.error.URLError: logs = [_("Failed to retrieve log. TaskTracker not ready.")] * 4 context = { "attempt": attempt, "taskid": taskid, "joblnk": job_link, "task": task, "logs": logs, "logs_list": attempt.get_log_list(), "first_log_tab": first_log_tab, } if request.GET.get('format') == 'python': return context else: context['logs'] = [ LinkJobLogs._make_links(log) for i, log in enumerate(logs) ] if request.GET.get('format') == 'json': response = { "logs": context['logs'], "logsList": context['logs_list'], "isRunning": job.status.lower() in ('running', 'pending', 'prep') } return JsonResponse(response) else: return render("attempt_logs.mako", request, context)
def single_tracker(request, trackerid): jt = get_api(request.user, request.jt) tracker = jt.get_tracker(trackerid) return render("tasktracker.mako", request, {'tracker': tracker})
def single_tracker(request, trackerid): jt = get_api(request.user, request.jt) tracker = jt.get_tracker(trackerid) return render("tasktracker.mako", request, {'tracker':tracker})
def job_attempt_logs_json(request, job, attempt_index=0, name='syslog', offset=LOG_OFFSET_BYTES, is_embeddable=False): """For async log retrieval as Yarn servers are very slow""" log_link = None response = {'status': -1} try: jt = get_api(request.user, request.jt) app = jt.get_application(job.jobId) if app['applicationType'] == 'MAPREDUCE': if app['finalStatus'] in ('SUCCEEDED', 'FAILED', 'KILLED'): attempt_index = int(attempt_index) if not job.job_attempts['jobAttempt']: response = {'status': 0, 'log': _('Job has no tasks')} else: attempt = job.job_attempts['jobAttempt'][attempt_index] log_link = attempt['logsLink'] # Reformat log link to use YARN RM, replace node addr with node ID addr log_link = log_link.replace(attempt['nodeHttpAddress'], attempt['nodeId']) elif app['state'] == 'RUNNING': log_link = app['amContainerLogs'] elif app.get('amContainerLogs'): log_link = app.get('amContainerLogs') except (KeyError, RestException) as e: raise KeyError( _("Cannot find job attempt '%(id)s'.") % {'id': job.jobId}, e) except Exception as e: raise Exception( _("Failed to get application for job %s: %s") % (job.jobId, e)) if log_link: link = '/%s/' % name params = {'doAs': request.user.username} if offset != 0: params['start'] = offset root = Resource(get_log_client(log_link), urllib.parse.urlsplit(log_link)[2], urlencode=False) api_resp = None try: api_resp = root.get(link, params=params) log = html.fromstring(api_resp, parser=html.HTMLParser()).xpath( '/html/body/table/tbody/tr/td[2]')[0].text_content() response['status'] = 0 response['log'] = LinkJobLogs._make_hdfs_links(log, is_embeddable) except Exception as e: response['log'] = _('Failed to retrieve log: %s' % e) try: debug_info = '\nLog Link: %s' % log_link if api_resp: debug_info += '\nHTML Response: %s' % response response['debug'] = debug_info LOG.error(debug_info) except: LOG.exception('failed to create debug info') return JsonResponse(response)
submit_to.append('yarn_clusters.' + name) if not submit_to: res.append( ("hadoop", "Please designate one of the MapReduce or " "Yarn clusters with `submit_to=true' in order to run jobs.")) return res def test_yarn_configurations(user): result = [] try: from jobbrowser.api import get_api # Required for cluster HA testing except Exception, e: LOG.warn('Jobbrowser is disabled, skipping test_yarn_configurations') return result try: get_api(user, None).get_jobs(user, username=user.username, state='all', text='') except Exception, e: msg = 'Failed to contact an active Resource Manager: %s' % e LOG.exception(msg) result.append(('Resource Manager', msg)) return result
def kill_job(request, job): if request.method != "POST": raise Exception(_("kill_job may only be invoked with a POST (got a %(method)s).") % {'method': request.method}) if job.user != request.user.username and not request.user.is_superuser: access_warn(request, _('Insufficient permission')) raise MessageException(_("Permission denied. User %(username)s cannot delete user %(user)s's job.") % {'username': request.user.username, 'user': job.user}) try: job.kill() except Exception, e: LOG.exception('Killing job') raise PopupException(e) cur_time = time.time() api = get_api(request.user, request.jt) while time.time() - cur_time < 15: try: job = api.get_job(jobid=job.jobId) except Exception, e: LOG.warn('Failed to get job with ID %s: %s' % (job.jobId, e)) else: if job.status not in ["RUNNING", "QUEUED"]: if request.REQUEST.get("next"): return HttpResponseRedirect(request.REQUEST.get("next")) elif request.REQUEST.get("format") == "json": return JsonResponse({'status': 0}, encoder=JSONEncoderForHTML) else: raise MessageException("Job Killed") time.sleep(1)