Example #1
0
  def cancel(self, notebook, snippet):
    job_id = snippet['result']['handle']['id']

    job = check_job_access_permission(self, job_id)
    check_job_edition_permission(job, self.user)

    api.get(self.fs, self.jt, self.user).stop(job_id)

    return {'status': 0}
Example #2
0
def stop(request):
  if request.method != 'POST':
    raise PopupException(_('POST request required.'))

  pig_script = PigScript.objects.get(id=request.POST.get('id'))
  job_id = pig_script.dict['job_id']

  job = check_job_access_permission(request, job_id)
  check_job_edition_permission(job, request.user)

  try:
    api.get(request.fs, request.jt, request.user).stop(job_id)
  except RestException, e:
    raise PopupException(_("Error stopping Pig script.") % e.message)
Example #3
0
def stop(request):
    if request.method != 'POST':
        raise PopupException(_('POST request required.'))

    pig_script = PigScript.objects.get(id=request.POST.get('id'))
    job_id = pig_script.dict['job_id']

    job = check_job_access_permission(request, job_id)
    check_job_edition_permission(job, request.user)

    try:
        api.get(request.fs, request.jt, request.user).stop(job_id)
    except RestException, e:
        raise PopupException(_("Error stopping Pig script.") % e.message)
Example #4
0
def run(request):
    if request.method != "POST":
        raise PopupException(_("POST request required."))

    attrs = {
        "id": request.POST.get("id"),
        "name": request.POST.get("name"),
        "script": request.POST.get("script"),
        "user": request.user,
        "parameters": json.loads(request.POST.get("parameters")),
        "resources": json.loads(request.POST.get("resources")),
        "hadoopProperties": json.loads(request.POST.get("hadoopProperties")),
        "is_design": False,
    }

    pig_script = create_or_update_script(**attrs)

    params = request.POST.get("submissionVariables")
    oozie_id = api.get(request.fs, request.jt, request.user).submit(pig_script, params)

    pig_script.update_from_dict({"job_id": oozie_id})
    pig_script.save()

    response = {"id": pig_script.id, "watchUrl": reverse("pig:watch", kwargs={"job_id": oozie_id}) + "?format=python"}

    return HttpResponse(json.dumps(response), content_type="text/plain")
Example #5
0
    def test_check_hcatalogs_sharelib(self):
        api = get(None, None, self.user)
        pig_script = self.create_script()

        # Regular
        wf = api._create_workflow(pig_script, '[]')
        assert_false({
            'name': u'oozie.action.sharelib.for.pig',
            'value': u'pig,hcatalog'
        } in wf.find_all_parameters(), wf.find_all_parameters())

        # With HCat
        pig_script.update_from_dict({
            'script':
            """
           a = LOAD 'sample_07' USING org.apache.hcatalog.pig.HCatLoader();
           dump a;
    """
        })
        pig_script.save()

        wf = api._create_workflow(pig_script, '[]')
        assert_true({
            'name': u'oozie.action.sharelib.for.pig',
            'value': u'pig,hcatalog'
        } in wf.find_all_parameters(), wf.find_all_parameters())
Example #6
0
File: tests.py Project: lorelib/hue
  def test_check_automated_hcatalogs_credentials(self):
    reset = SECURITY_ENABLED.set_for_testing(True)

    try:
      api = get(None, None, self.user)
      pig_script = self.create_script()
      pig_script.update_from_dict({
          'script':"""
            a = LOAD 'sample_07' USING org.apache.hcatalog.pig.HCatLoader();
            dump a;

            STORE raw_data INTO 'students' USING
            org.apache.pig.backend.hadoop.hbase.HBaseStorage
            org.apache.pig.backend.hadoop.hbase.HBaseStorage (
            'info:first_name info:last_name info:age info:gpa info:part');
            raw_data = LOAD 'students' USING PigStorage( ' ' ) AS (
            id: chararray,
            first_name: chararray,
            last_name: chararray,
            age: int,
            gpa: float,
            part: int );
      """})
      pig_script.save()

      wf = api._create_workflow(pig_script, '[]')
      start_link = wf.start.get_link()
      pig_action = start_link.child
      assert_equal([{u'name': u'hcat', u'value': True}, {u'name': u'hbase', u'value': True}], pig_action.credentials)
    finally:
      reset()
Example #7
0
File: tests.py Project: shanyou/hue
    def test_check_hcatalogs_sharelib(self):
        api = get(None, None, self.user)
        pig_script = self.create_script()

        # Regular
        wf = api._create_workflow(pig_script, "[]")
        assert_false(
            {"name": u"oozie.action.sharelib.for.pig", "value": u"pig,hcatalog"} in wf.find_all_parameters(),
            wf.find_all_parameters(),
        )

        # With HCat
        pig_script.update_from_dict(
            {
                "script": """
           a = LOAD 'sample_07' USING org.apache.hcatalog.pig.HCatLoader();
           dump a;
    """
            }
        )
        pig_script.save()

        wf = api._create_workflow(pig_script, "[]")
        assert_true(
            {"name": u"oozie.action.sharelib.for.pig", "value": u"pig,hcatalog"} in wf.find_all_parameters(),
            wf.find_all_parameters(),
        )

        start_link = wf.start.get_link()
        pig_action = start_link.child
        assert_equal([], pig_action.credentials)
Example #8
0
def run(request):
    if request.method != 'POST':
        raise PopupException(_('POST request required.'))

    attrs = {
        'id': request.POST.get('id'),
        'name': request.POST.get('name'),
        'script': request.POST.get('script'),
        'user': request.user,
        'parameters': json.loads(request.POST.get('parameters')),
        'resources': json.loads(request.POST.get('resources')),
        'hadoopProperties': json.loads(request.POST.get('hadoopProperties')),
        'is_design': False
    }

    pig_script = create_or_update_script(**attrs)

    params = request.POST.get('submissionVariables')
    oozie_id = api.get(request.fs, request.jt,
                       request.user).submit(pig_script, params)

    pig_script.update_from_dict({'job_id': oozie_id})
    pig_script.save()

    response = {
        'id':
        pig_script.id,
        'watchUrl':
        reverse('pig:watch', kwargs={'job_id': oozie_id}) + '?format=python'
    }

    return HttpResponse(json.dumps(response), content_type="text/plain")
Example #9
0
  def test_check_automated_hcatalogs_credentials(self):
    reset = SECURITY_ENABLED.set_for_testing(True)

    try:
      api = get(None, None, self.user)
      pig_script = self.create_script()
      pig_script.update_from_dict({
          'script':"""
            a = LOAD 'sample_07' USING org.apache.hcatalog.pig.HCatLoader();
            dump a;

            STORE raw_data INTO 'students' USING
            org.apache.pig.backend.hadoop.hbase.HBaseStorage
            org.apache.pig.backend.hadoop.hbase.HBaseStorage (
            'info:first_name info:last_name info:age info:gpa info:part');
            raw_data = LOAD 'students' USING PigStorage( ' ' ) AS (
            id: chararray,
            first_name: chararray,
            last_name: chararray,
            age: int,
            gpa: float,
            part: int );
      """})
      pig_script.save()

      wf = api._create_workflow(pig_script, '[]')
      start_link = wf.start.get_link()
      pig_action = start_link.child
      assert_equal([{u'name': u'hcat', u'value': True}, {u'name': u'hbase', u'value': True}], pig_action.credentials)
    finally:
      reset()
Example #10
0
def run(request):
  if request.method != 'POST':
    raise PopupException(_('POST request required.'))

  attrs = {
    'id': request.POST.get('id'),
    'name': request.POST.get('name'),
    'script': request.POST.get('script'),
    'user': request.user,
    'parameters': json.loads(request.POST.get('parameters')),
    'resources': json.loads(request.POST.get('resources')),
    'hadoopProperties': json.loads(request.POST.get('hadoopProperties')),
    'is_design': False
  }

  pig_script = create_or_update_script(**attrs)

  params = request.POST.get('submissionVariables')
  oozie_id = api.get(request.fs, request.jt, request.user).submit(pig_script, params)

  pig_script.update_from_dict({'job_id': oozie_id})
  pig_script.save()

  response = {
    'id': pig_script.id,
    'watchUrl': reverse('pig:watch', kwargs={'job_id': oozie_id}) + '?format=python'
  }

  return HttpResponse(json.dumps(response), content_type="text/plain")
Example #11
0
def dashboard(request):
  pig_api = api.get(request.fs, request.user)

  jobs = pig_api.get_jobs()
  hue_jobs = PigScript.objects.filter(owner=request.user)
  massaged_jobs = pig_api.massaged_jobs_for_json(request, jobs, hue_jobs)

  return HttpResponse(json.dumps(massaged_jobs), mimetype="application/json")
Example #12
0
def dashboard(request):
    pig_api = api.get(request.fs, request.jt, request.user)

    jobs = pig_api.get_jobs()
    hue_jobs = Document.objects.available(PigScript, request.user)
    massaged_jobs = pig_api.massaged_jobs_for_json(request, jobs, hue_jobs)

    return HttpResponse(json.dumps(massaged_jobs), mimetype="application/json")
Example #13
0
def dashboard(request):
  pig_api = api.get(request.fs, request.jt, request.user)

  jobs = pig_api.get_jobs()
  hue_jobs = Document.objects.available(PigScript, request.user)
  massaged_jobs = pig_api.massaged_jobs_for_json(request, jobs, hue_jobs)

  return HttpResponse(json.dumps(massaged_jobs), mimetype="application/json")
Example #14
0
  def get_log(self, notebook, snippet, startFrom=0, size=None):
    job_id = snippet['result']['handle']['id']
    request = MockRequest(self.user, self.fs, self.jt)

    oozie_workflow = check_job_access_permission(MockRequest(self.user, self.fs, self.jt), job_id)
    logs, workflow_actions, is_really_done = api.get(self.jt, self.jt, self.user).get_log(request, oozie_workflow)

    return logs
Example #15
0
def dashboard(request):
  pig_api = api.get(request.fs, request.jt, request.user)

  jobs = pig_api.get_jobs()
  hue_jobs = Document.objects.available(PigScript, request.user, with_history=True)
  massaged_jobs = pig_api.massaged_jobs_for_json(request, jobs, hue_jobs)

  return JsonResponse(massaged_jobs, safe=False)
Example #16
0
def dashboard(request):
  pig_api = api.get(request.fs, request.user)

  jobs = pig_api.get_jobs()
  hue_jobs = PigScript.objects.filter(owner=request.user)
  massaged_jobs = pig_api.massaged_jobs_for_json(request, jobs, hue_jobs)

  return HttpResponse(json.dumps(massaged_jobs), mimetype="application/json")
Example #17
0
def dashboard(request):
  pig_api = api.get(request.fs, request.jt, request.user)

  jobs = pig_api.get_jobs()
  hue_jobs = Document.objects.available(PigScript, request.user, with_history=True)
  massaged_jobs = pig_api.massaged_jobs_for_json(request, jobs, hue_jobs)

  return JsonResponse(massaged_jobs, safe=False)
Example #18
0
  def _get_output(self, oozie_workflow):
    q = QueryDict(self.request.GET, mutable=True)
    q['format'] = 'python' # Hack for triggering the good section in single_task_attempt_logs
    self.request.GET = q

    logs, workflow_actions, is_really_done = api.get(self.fs, self.jt, self.user).get_log(self.request, oozie_workflow)

    return logs, workflow_actions, is_really_done
Example #19
0
    def _get_output(self, oozie_workflow):
        q = QueryDict(self.request.GET, mutable=True)
        q['format'] = 'python'  # Hack for triggering the good section in single_task_attempt_logs
        self.request.GET = q

        logs, workflow_actions, is_really_done = api.get(
            self.fs, self.jt, self.user).get_log(self.request, oozie_workflow)

        return logs, workflow_actions, is_really_done
Example #20
0
  def _get_log_output(self, oozie_workflow):
    log_output = ''

    q = QueryDict(self.request.GET, mutable=True)
    q['format'] = 'python'  # Hack for triggering the good section in single_task_attempt_logs
    self.request.GET = q

    logs, workflow_actions, is_really_done = api.get(self.fs, self.jt, self.user).get_log(self.request, oozie_workflow,
                                                                                          make_links=False)

    if len(logs) > 0:
      log_output = logs.values()[0]
      if log_output.startswith('Unable to locate'):
        LOG.debug('Failed to get job attempt logs, possibly due to YARN archiving job to JHS. Will sleep and try again.')
        time.sleep(5.0)
        logs, workflow_actions, is_really_done = api.get(self.fs, self.jt, self.user).get_log(self.request, oozie_workflow,
                                                                                              make_links=False)
        if len(logs) > 0:
          log_output = logs.values()[0]

    return log_output, workflow_actions, is_really_done
Example #21
0
File: views.py Project: wasimf/hue
def run(request):
  pig_script = create_or_update_script(request.POST.get('id'), request.POST.get('name'), request.POST.get('script'), request.user, is_design=False)

  params = {}
  oozie_id = api.get(request.fs, request.user).submit(pig_script, params)

  pig_script.update_from_dict({'job_id': oozie_id})
  pig_script.save()

  response = {
    'id': pig_script.id,
    'watchUrl': reverse('pig:watch', kwargs={'job_id': oozie_id}) + '?format=python'
  }

  return HttpResponse(json.dumps(response), content_type="text/plain")
Example #22
0
  def test_check_hcatalogs_sharelib(self):
    api = get(None, None, self.user)
    pig_script = self.create_script()

    # Regular
    wf = api._create_workflow(pig_script, '[]')
    assert_false({'name': u'oozie.action.sharelib.for.pig', 'value': u'pig,hcatalog'} in wf.find_all_parameters(), wf.find_all_parameters())

    # With HCat
    pig_script.update_from_dict({
        'script':"""
           a = LOAD 'sample_07' USING org.apache.hcatalog.pig.HCatLoader();
           dump a;
    """})
    pig_script.save()

    wf = api._create_workflow(pig_script, '[]')
    assert_true({'name': u'oozie.action.sharelib.for.pig', 'value': u'pig,hcatalog'} in wf.find_all_parameters(), wf.find_all_parameters())
Example #23
0
  def execute(self, notebook, snippet):

    attrs = {
      'script': snippet['statement'],
      'name': snippet['properties'].get('name', 'Pig Snippet'),
      'parameters': snippet['properties'].get('parameters'),
      'resources': snippet['properties'].get('resources'),
      'hadoopProperties': snippet['properties'].get('hadoopProperties')
    }

    pig_script = PigScript2(attrs)

    params = json.dumps([])
    oozie_id = api.get(self.fs, self.jt, self.user).submit(pig_script, params)

    return {
      'id': oozie_id,
      'watchUrl': reverse('pig:watch', kwargs={'job_id': oozie_id}) + '?format=python'
    }
Example #24
0
  def check_status(self, notebook, snippet):
    job_id = snippet['result']['handle']['id']
    request = MockRequest(self.user, self.fs, self.jt)

    oozie_workflow = check_job_access_permission(request, job_id)
    logs, workflow_actions, is_really_done = api.get(self.jt, self.jt, self.user).get_log(request, oozie_workflow)

    if is_really_done and not oozie_workflow.is_running():
      if oozie_workflow.status in ('KILLED', 'FAILED'):
        raise QueryError(_('The script failed to run and was stopped'))
      status = 'available'
    elif oozie_workflow.is_running():
      status = 'running'
    else:
      status = 'failed'

    return {
        'status': status
    }
Example #25
0
def watch(request, job_id):
    oozie_workflow = check_job_access_permission(request, job_id)
    logs, workflow_actions, is_really_done = api.get(
        request.fs, request.jt, request.user).get_log(request, oozie_workflow)
    output = get_workflow_output(oozie_workflow, request.fs)

    workflow = {
        'job_id':
        oozie_workflow.id,
        'status':
        oozie_workflow.status,
        'progress':
        oozie_workflow.get_progress(),
        'isRunning':
        oozie_workflow.is_running(),
        'killUrl':
        reverse('oozie:manage_oozie_jobs',
                kwargs={
                    'job_id': oozie_workflow.id,
                    'action': 'kill'
                }),
        'rerunUrl':
        reverse('oozie:rerun_oozie_job',
                kwargs={
                    'job_id':
                    oozie_workflow.id,
                    'app_path':
                    urllib.parse.quote(oozie_workflow.appPath.encode('utf-8'),
                                       safe=SAFE_CHARACTERS_URI_COMPONENTS)
                }),
        'actions':
        workflow_actions
    }

    response = {
        'workflow': workflow,
        'logs': logs,
        'isReallyDone': is_really_done,
        'output': hdfs_link(output)
    }

    return JsonResponse(response, content_type="text/plain")
Example #26
0
def watch(request, job_id):
    oozie_workflow = check_job_access_permission(request, job_id)
    logs, workflow_actions = api.get(request, job_id).get_log(request, oozie_workflow)
    output = get_workflow_output(oozie_workflow, request.fs)

    workflow = {
        "job_id": oozie_workflow.id,
        "status": oozie_workflow.status,
        "progress": oozie_workflow.get_progress(),
        "isRunning": oozie_workflow.is_running(),
        "killUrl": reverse("oozie:manage_oozie_jobs", kwargs={"job_id": oozie_workflow.id, "action": "kill"}),
        "rerunUrl": reverse(
            "oozie:rerun_oozie_job", kwargs={"job_id": oozie_workflow.id, "app_path": oozie_workflow.appPath}
        ),
        "actions": workflow_actions,
    }

    response = {"workflow": workflow, "logs": logs, "output": hdfs_link(output)}

    return HttpResponse(json.dumps(response), content_type="text/plain")
Example #27
0
def watch(request, job_id):
    oozie_workflow = check_job_access_permission(request, job_id)
    logs, workflow_actions, is_really_done = api.get(
        request.jt, request.jt, request.user).get_log(request, oozie_workflow)
    output = get_workflow_output(oozie_workflow, request.fs)

    workflow = {
        'job_id':
        oozie_workflow.id,
        'status':
        oozie_workflow.status,
        'progress':
        oozie_workflow.get_progress(),
        'isRunning':
        oozie_workflow.is_running(),
        'killUrl':
        reverse('oozie:manage_oozie_jobs',
                kwargs={
                    'job_id': oozie_workflow.id,
                    'action': 'kill'
                }),
        'rerunUrl':
        reverse('oozie:rerun_oozie_job',
                kwargs={
                    'job_id': oozie_workflow.id,
                    'app_path': oozie_workflow.appPath
                }),
        'actions':
        workflow_actions
    }

    response = {
        'workflow': workflow,
        'logs': logs,
        'isReallyDone': is_really_done,
        'output': hdfs_link(output)
    }

    return HttpResponse(json.dumps(response), content_type="text/plain")
Example #28
0
def watch(request, job_id):
  oozie_workflow = check_job_access_permission(request, job_id)
  logs, workflow_actions = api.get(request.jt, request.jt, request.user).get_log(request, oozie_workflow)
  output = get_workflow_output(oozie_workflow, request.fs)

  workflow = {
    'job_id': oozie_workflow.id,
    'status': oozie_workflow.status,
    'progress': oozie_workflow.get_progress(),
    'isRunning': oozie_workflow.is_running(),
    'killUrl': reverse('oozie:manage_oozie_jobs', kwargs={'job_id': oozie_workflow.id, 'action': 'kill'}),
    'rerunUrl': reverse('oozie:rerun_oozie_job', kwargs={'job_id': oozie_workflow.id, 'app_path': oozie_workflow.appPath}),
    'actions': workflow_actions
  }

  response = {
    'workflow': workflow,
    'logs': logs,
    'output': hdfs_link(output)
  }

  return HttpResponse(json.dumps(response), content_type="text/plain")
Example #29
0
  def parse_oozie_logs(self):
    api = get(None, None, self.user)

    assert_equal(
'''Run pig script using PigRunner.run() for Pig version 0.8+
  Apache Pig version 0.11.0-cdh4.4.0-SNAPSHOT (rexported)
  compiled Jun 30 2013, 03:40:22

  Run pig script using PigRunner.run() for Pig version 0.8+
  2013-10-09 17:30:39,709 [main] INFO  org.apache.pig.Main  - Apache Pig version 0.11.0-cdh4.4.0-SNAPSHOT (rexported) compiled Jun 30 2013, 03:40:22
  2013-10-09 17:30:39,709 [main] INFO  org.apache.pig.Main  - Apache Pig version 0.11.0-cdh4.4.0-SNAPSHOT (rexported) compiled Jun 30 2013, 03:40:22
  2013-10-09 17:30:39,710 [main] INFO  org.apache.pig.Main  - Logging error messages to: /var/lib/hadoop-yarn/cache/yarn/nm-local-dir/usercache/romain/appcache/application_1381360805876_0001/container_1381360805876_0001_01_000002/pig-job_1381360805876_0001.log
  2013-10-09 17:30:39,710 [main] INFO  org.apache.pig.Main  - Logging error messages to: /var/lib/hadoop-yarn/cache/yarn/nm-local-dir/usercache/romain/appcache/application_1381360805876_0001/container_1381360805876_0001_01_000002/pig-job_1381360805876_0001.log
  2013-10-09 17:30:39,739 [main] WARN  org.apache.hadoop.conf.Configuration  - dfs.df.interval is deprecated. Instead, use fs.df.interval
  2013-10-09 17:30:39,739 [main] WARN  org.apache.hadoop.conf.Configuration  - mapred.task.tracker.http.address is deprecated. Instead, use mapreduce.tasktracker.http.address
  2013-10-09 17:30:39,833 [main] INFO  org.apache.pig.backend.hadoop.executionengine.HExecutionEngine  - Connecting to map-reduce job tracker at: localhost:8032
  hdfs://localhost:8020/user/romain/.Trash  <dir>
  hdfs://localhost:8020/user/romain/examples  <dir>
  hdfs://localhost:8020/user/romain/tweets  <dir>
  hdfs://localhost:8020/user/romain/wordcount.jar<r 1>  3165
  hdfs://localhost:8020/user/romain/words  <dir>
  hdfs://localhost:8020/user/romain/yelp  <dir>''', api._match_logs({'logs': [None, OOZIE_LOGS]}))
Example #30
0
File: tests.py Project: lorelib/hue
  def parse_oozie_logs(self):
    api = get(None, None, self.user)

    assert_equal(
'''Run pig script using PigRunner.run() for Pig version 0.8+
  Apache Pig version 0.11.0-cdh4.4.0-SNAPSHOT (rexported)
  compiled Jun 30 2013, 03:40:22

  Run pig script using PigRunner.run() for Pig version 0.8+
  2013-10-09 17:30:39,709 [main] INFO  org.apache.pig.Main  - Apache Pig version 0.11.0-cdh4.4.0-SNAPSHOT (rexported) compiled Jun 30 2013, 03:40:22
  2013-10-09 17:30:39,709 [main] INFO  org.apache.pig.Main  - Apache Pig version 0.11.0-cdh4.4.0-SNAPSHOT (rexported) compiled Jun 30 2013, 03:40:22
  2013-10-09 17:30:39,710 [main] INFO  org.apache.pig.Main  - Logging error messages to: /var/lib/hadoop-yarn/cache/yarn/nm-local-dir/usercache/romain/appcache/application_1381360805876_0001/container_1381360805876_0001_01_000002/pig-job_1381360805876_0001.log
  2013-10-09 17:30:39,710 [main] INFO  org.apache.pig.Main  - Logging error messages to: /var/lib/hadoop-yarn/cache/yarn/nm-local-dir/usercache/romain/appcache/application_1381360805876_0001/container_1381360805876_0001_01_000002/pig-job_1381360805876_0001.log
  2013-10-09 17:30:39,739 [main] WARN  org.apache.hadoop.conf.Configuration  - dfs.df.interval is deprecated. Instead, use fs.df.interval
  2013-10-09 17:30:39,739 [main] WARN  org.apache.hadoop.conf.Configuration  - mapred.task.tracker.http.address is deprecated. Instead, use mapreduce.tasktracker.http.address
  2013-10-09 17:30:39,833 [main] INFO  org.apache.pig.backend.hadoop.executionengine.HExecutionEngine  - Connecting to map-reduce job tracker at: localhost:8032
  hdfs://localhost:8020/user/romain/.Trash  <dir>
  hdfs://localhost:8020/user/romain/examples  <dir>
  hdfs://localhost:8020/user/romain/tweets  <dir>
  hdfs://localhost:8020/user/romain/wordcount.jar<r 1>  3165
  hdfs://localhost:8020/user/romain/words  <dir>
  hdfs://localhost:8020/user/romain/yelp  <dir>''', api._match_logs({'logs': [None, OOZIE_LOGS]}))