def cancel(self, notebook, snippet): job_id = snippet['result']['handle']['id'] job = check_job_access_permission(self, job_id) check_job_edition_permission(job, self.user) api.get(self.fs, self.jt, self.user).stop(job_id) return {'status': 0}
def stop(request): if request.method != 'POST': raise PopupException(_('POST request required.')) pig_script = PigScript.objects.get(id=request.POST.get('id')) job_id = pig_script.dict['job_id'] job = check_job_access_permission(request, job_id) check_job_edition_permission(job, request.user) try: api.get(request.fs, request.jt, request.user).stop(job_id) except RestException, e: raise PopupException(_("Error stopping Pig script.") % e.message)
def run(request): if request.method != "POST": raise PopupException(_("POST request required.")) attrs = { "id": request.POST.get("id"), "name": request.POST.get("name"), "script": request.POST.get("script"), "user": request.user, "parameters": json.loads(request.POST.get("parameters")), "resources": json.loads(request.POST.get("resources")), "hadoopProperties": json.loads(request.POST.get("hadoopProperties")), "is_design": False, } pig_script = create_or_update_script(**attrs) params = request.POST.get("submissionVariables") oozie_id = api.get(request.fs, request.jt, request.user).submit(pig_script, params) pig_script.update_from_dict({"job_id": oozie_id}) pig_script.save() response = {"id": pig_script.id, "watchUrl": reverse("pig:watch", kwargs={"job_id": oozie_id}) + "?format=python"} return HttpResponse(json.dumps(response), content_type="text/plain")
def test_check_hcatalogs_sharelib(self): api = get(None, None, self.user) pig_script = self.create_script() # Regular wf = api._create_workflow(pig_script, '[]') assert_false({ 'name': u'oozie.action.sharelib.for.pig', 'value': u'pig,hcatalog' } in wf.find_all_parameters(), wf.find_all_parameters()) # With HCat pig_script.update_from_dict({ 'script': """ a = LOAD 'sample_07' USING org.apache.hcatalog.pig.HCatLoader(); dump a; """ }) pig_script.save() wf = api._create_workflow(pig_script, '[]') assert_true({ 'name': u'oozie.action.sharelib.for.pig', 'value': u'pig,hcatalog' } in wf.find_all_parameters(), wf.find_all_parameters())
def test_check_automated_hcatalogs_credentials(self): reset = SECURITY_ENABLED.set_for_testing(True) try: api = get(None, None, self.user) pig_script = self.create_script() pig_script.update_from_dict({ 'script':""" a = LOAD 'sample_07' USING org.apache.hcatalog.pig.HCatLoader(); dump a; STORE raw_data INTO 'students' USING org.apache.pig.backend.hadoop.hbase.HBaseStorage org.apache.pig.backend.hadoop.hbase.HBaseStorage ( 'info:first_name info:last_name info:age info:gpa info:part'); raw_data = LOAD 'students' USING PigStorage( ' ' ) AS ( id: chararray, first_name: chararray, last_name: chararray, age: int, gpa: float, part: int ); """}) pig_script.save() wf = api._create_workflow(pig_script, '[]') start_link = wf.start.get_link() pig_action = start_link.child assert_equal([{u'name': u'hcat', u'value': True}, {u'name': u'hbase', u'value': True}], pig_action.credentials) finally: reset()
def test_check_hcatalogs_sharelib(self): api = get(None, None, self.user) pig_script = self.create_script() # Regular wf = api._create_workflow(pig_script, "[]") assert_false( {"name": u"oozie.action.sharelib.for.pig", "value": u"pig,hcatalog"} in wf.find_all_parameters(), wf.find_all_parameters(), ) # With HCat pig_script.update_from_dict( { "script": """ a = LOAD 'sample_07' USING org.apache.hcatalog.pig.HCatLoader(); dump a; """ } ) pig_script.save() wf = api._create_workflow(pig_script, "[]") assert_true( {"name": u"oozie.action.sharelib.for.pig", "value": u"pig,hcatalog"} in wf.find_all_parameters(), wf.find_all_parameters(), ) start_link = wf.start.get_link() pig_action = start_link.child assert_equal([], pig_action.credentials)
def run(request): if request.method != 'POST': raise PopupException(_('POST request required.')) attrs = { 'id': request.POST.get('id'), 'name': request.POST.get('name'), 'script': request.POST.get('script'), 'user': request.user, 'parameters': json.loads(request.POST.get('parameters')), 'resources': json.loads(request.POST.get('resources')), 'hadoopProperties': json.loads(request.POST.get('hadoopProperties')), 'is_design': False } pig_script = create_or_update_script(**attrs) params = request.POST.get('submissionVariables') oozie_id = api.get(request.fs, request.jt, request.user).submit(pig_script, params) pig_script.update_from_dict({'job_id': oozie_id}) pig_script.save() response = { 'id': pig_script.id, 'watchUrl': reverse('pig:watch', kwargs={'job_id': oozie_id}) + '?format=python' } return HttpResponse(json.dumps(response), content_type="text/plain")
def dashboard(request): pig_api = api.get(request.fs, request.user) jobs = pig_api.get_jobs() hue_jobs = PigScript.objects.filter(owner=request.user) massaged_jobs = pig_api.massaged_jobs_for_json(request, jobs, hue_jobs) return HttpResponse(json.dumps(massaged_jobs), mimetype="application/json")
def dashboard(request): pig_api = api.get(request.fs, request.jt, request.user) jobs = pig_api.get_jobs() hue_jobs = Document.objects.available(PigScript, request.user) massaged_jobs = pig_api.massaged_jobs_for_json(request, jobs, hue_jobs) return HttpResponse(json.dumps(massaged_jobs), mimetype="application/json")
def get_log(self, notebook, snippet, startFrom=0, size=None): job_id = snippet['result']['handle']['id'] request = MockRequest(self.user, self.fs, self.jt) oozie_workflow = check_job_access_permission(MockRequest(self.user, self.fs, self.jt), job_id) logs, workflow_actions, is_really_done = api.get(self.jt, self.jt, self.user).get_log(request, oozie_workflow) return logs
def dashboard(request): pig_api = api.get(request.fs, request.jt, request.user) jobs = pig_api.get_jobs() hue_jobs = Document.objects.available(PigScript, request.user, with_history=True) massaged_jobs = pig_api.massaged_jobs_for_json(request, jobs, hue_jobs) return JsonResponse(massaged_jobs, safe=False)
def _get_output(self, oozie_workflow): q = QueryDict(self.request.GET, mutable=True) q['format'] = 'python' # Hack for triggering the good section in single_task_attempt_logs self.request.GET = q logs, workflow_actions, is_really_done = api.get(self.fs, self.jt, self.user).get_log(self.request, oozie_workflow) return logs, workflow_actions, is_really_done
def _get_output(self, oozie_workflow): q = QueryDict(self.request.GET, mutable=True) q['format'] = 'python' # Hack for triggering the good section in single_task_attempt_logs self.request.GET = q logs, workflow_actions, is_really_done = api.get( self.fs, self.jt, self.user).get_log(self.request, oozie_workflow) return logs, workflow_actions, is_really_done
def _get_log_output(self, oozie_workflow): log_output = '' q = QueryDict(self.request.GET, mutable=True) q['format'] = 'python' # Hack for triggering the good section in single_task_attempt_logs self.request.GET = q logs, workflow_actions, is_really_done = api.get(self.fs, self.jt, self.user).get_log(self.request, oozie_workflow, make_links=False) if len(logs) > 0: log_output = logs.values()[0] if log_output.startswith('Unable to locate'): LOG.debug('Failed to get job attempt logs, possibly due to YARN archiving job to JHS. Will sleep and try again.') time.sleep(5.0) logs, workflow_actions, is_really_done = api.get(self.fs, self.jt, self.user).get_log(self.request, oozie_workflow, make_links=False) if len(logs) > 0: log_output = logs.values()[0] return log_output, workflow_actions, is_really_done
def run(request): pig_script = create_or_update_script(request.POST.get('id'), request.POST.get('name'), request.POST.get('script'), request.user, is_design=False) params = {} oozie_id = api.get(request.fs, request.user).submit(pig_script, params) pig_script.update_from_dict({'job_id': oozie_id}) pig_script.save() response = { 'id': pig_script.id, 'watchUrl': reverse('pig:watch', kwargs={'job_id': oozie_id}) + '?format=python' } return HttpResponse(json.dumps(response), content_type="text/plain")
def test_check_hcatalogs_sharelib(self): api = get(None, None, self.user) pig_script = self.create_script() # Regular wf = api._create_workflow(pig_script, '[]') assert_false({'name': u'oozie.action.sharelib.for.pig', 'value': u'pig,hcatalog'} in wf.find_all_parameters(), wf.find_all_parameters()) # With HCat pig_script.update_from_dict({ 'script':""" a = LOAD 'sample_07' USING org.apache.hcatalog.pig.HCatLoader(); dump a; """}) pig_script.save() wf = api._create_workflow(pig_script, '[]') assert_true({'name': u'oozie.action.sharelib.for.pig', 'value': u'pig,hcatalog'} in wf.find_all_parameters(), wf.find_all_parameters())
def execute(self, notebook, snippet): attrs = { 'script': snippet['statement'], 'name': snippet['properties'].get('name', 'Pig Snippet'), 'parameters': snippet['properties'].get('parameters'), 'resources': snippet['properties'].get('resources'), 'hadoopProperties': snippet['properties'].get('hadoopProperties') } pig_script = PigScript2(attrs) params = json.dumps([]) oozie_id = api.get(self.fs, self.jt, self.user).submit(pig_script, params) return { 'id': oozie_id, 'watchUrl': reverse('pig:watch', kwargs={'job_id': oozie_id}) + '?format=python' }
def check_status(self, notebook, snippet): job_id = snippet['result']['handle']['id'] request = MockRequest(self.user, self.fs, self.jt) oozie_workflow = check_job_access_permission(request, job_id) logs, workflow_actions, is_really_done = api.get(self.jt, self.jt, self.user).get_log(request, oozie_workflow) if is_really_done and not oozie_workflow.is_running(): if oozie_workflow.status in ('KILLED', 'FAILED'): raise QueryError(_('The script failed to run and was stopped')) status = 'available' elif oozie_workflow.is_running(): status = 'running' else: status = 'failed' return { 'status': status }
def watch(request, job_id): oozie_workflow = check_job_access_permission(request, job_id) logs, workflow_actions, is_really_done = api.get( request.fs, request.jt, request.user).get_log(request, oozie_workflow) output = get_workflow_output(oozie_workflow, request.fs) workflow = { 'job_id': oozie_workflow.id, 'status': oozie_workflow.status, 'progress': oozie_workflow.get_progress(), 'isRunning': oozie_workflow.is_running(), 'killUrl': reverse('oozie:manage_oozie_jobs', kwargs={ 'job_id': oozie_workflow.id, 'action': 'kill' }), 'rerunUrl': reverse('oozie:rerun_oozie_job', kwargs={ 'job_id': oozie_workflow.id, 'app_path': urllib.parse.quote(oozie_workflow.appPath.encode('utf-8'), safe=SAFE_CHARACTERS_URI_COMPONENTS) }), 'actions': workflow_actions } response = { 'workflow': workflow, 'logs': logs, 'isReallyDone': is_really_done, 'output': hdfs_link(output) } return JsonResponse(response, content_type="text/plain")
def watch(request, job_id): oozie_workflow = check_job_access_permission(request, job_id) logs, workflow_actions = api.get(request, job_id).get_log(request, oozie_workflow) output = get_workflow_output(oozie_workflow, request.fs) workflow = { "job_id": oozie_workflow.id, "status": oozie_workflow.status, "progress": oozie_workflow.get_progress(), "isRunning": oozie_workflow.is_running(), "killUrl": reverse("oozie:manage_oozie_jobs", kwargs={"job_id": oozie_workflow.id, "action": "kill"}), "rerunUrl": reverse( "oozie:rerun_oozie_job", kwargs={"job_id": oozie_workflow.id, "app_path": oozie_workflow.appPath} ), "actions": workflow_actions, } response = {"workflow": workflow, "logs": logs, "output": hdfs_link(output)} return HttpResponse(json.dumps(response), content_type="text/plain")
def watch(request, job_id): oozie_workflow = check_job_access_permission(request, job_id) logs, workflow_actions, is_really_done = api.get( request.jt, request.jt, request.user).get_log(request, oozie_workflow) output = get_workflow_output(oozie_workflow, request.fs) workflow = { 'job_id': oozie_workflow.id, 'status': oozie_workflow.status, 'progress': oozie_workflow.get_progress(), 'isRunning': oozie_workflow.is_running(), 'killUrl': reverse('oozie:manage_oozie_jobs', kwargs={ 'job_id': oozie_workflow.id, 'action': 'kill' }), 'rerunUrl': reverse('oozie:rerun_oozie_job', kwargs={ 'job_id': oozie_workflow.id, 'app_path': oozie_workflow.appPath }), 'actions': workflow_actions } response = { 'workflow': workflow, 'logs': logs, 'isReallyDone': is_really_done, 'output': hdfs_link(output) } return HttpResponse(json.dumps(response), content_type="text/plain")
def watch(request, job_id): oozie_workflow = check_job_access_permission(request, job_id) logs, workflow_actions = api.get(request.jt, request.jt, request.user).get_log(request, oozie_workflow) output = get_workflow_output(oozie_workflow, request.fs) workflow = { 'job_id': oozie_workflow.id, 'status': oozie_workflow.status, 'progress': oozie_workflow.get_progress(), 'isRunning': oozie_workflow.is_running(), 'killUrl': reverse('oozie:manage_oozie_jobs', kwargs={'job_id': oozie_workflow.id, 'action': 'kill'}), 'rerunUrl': reverse('oozie:rerun_oozie_job', kwargs={'job_id': oozie_workflow.id, 'app_path': oozie_workflow.appPath}), 'actions': workflow_actions } response = { 'workflow': workflow, 'logs': logs, 'output': hdfs_link(output) } return HttpResponse(json.dumps(response), content_type="text/plain")
def parse_oozie_logs(self): api = get(None, None, self.user) assert_equal( '''Run pig script using PigRunner.run() for Pig version 0.8+ Apache Pig version 0.11.0-cdh4.4.0-SNAPSHOT (rexported) compiled Jun 30 2013, 03:40:22 Run pig script using PigRunner.run() for Pig version 0.8+ 2013-10-09 17:30:39,709 [main] INFO org.apache.pig.Main - Apache Pig version 0.11.0-cdh4.4.0-SNAPSHOT (rexported) compiled Jun 30 2013, 03:40:22 2013-10-09 17:30:39,709 [main] INFO org.apache.pig.Main - Apache Pig version 0.11.0-cdh4.4.0-SNAPSHOT (rexported) compiled Jun 30 2013, 03:40:22 2013-10-09 17:30:39,710 [main] INFO org.apache.pig.Main - Logging error messages to: /var/lib/hadoop-yarn/cache/yarn/nm-local-dir/usercache/romain/appcache/application_1381360805876_0001/container_1381360805876_0001_01_000002/pig-job_1381360805876_0001.log 2013-10-09 17:30:39,710 [main] INFO org.apache.pig.Main - Logging error messages to: /var/lib/hadoop-yarn/cache/yarn/nm-local-dir/usercache/romain/appcache/application_1381360805876_0001/container_1381360805876_0001_01_000002/pig-job_1381360805876_0001.log 2013-10-09 17:30:39,739 [main] WARN org.apache.hadoop.conf.Configuration - dfs.df.interval is deprecated. Instead, use fs.df.interval 2013-10-09 17:30:39,739 [main] WARN org.apache.hadoop.conf.Configuration - mapred.task.tracker.http.address is deprecated. Instead, use mapreduce.tasktracker.http.address 2013-10-09 17:30:39,833 [main] INFO org.apache.pig.backend.hadoop.executionengine.HExecutionEngine - Connecting to map-reduce job tracker at: localhost:8032 hdfs://localhost:8020/user/romain/.Trash <dir> hdfs://localhost:8020/user/romain/examples <dir> hdfs://localhost:8020/user/romain/tweets <dir> hdfs://localhost:8020/user/romain/wordcount.jar<r 1> 3165 hdfs://localhost:8020/user/romain/words <dir> hdfs://localhost:8020/user/romain/yelp <dir>''', api._match_logs({'logs': [None, OOZIE_LOGS]}))