Ejemplo n.º 1
0
class ResourceManagerApi(object):
  def __init__(self, oozie_url, security_enabled=False):
    self._url = posixpath.join(oozie_url, 'ws', _API_VERSION)
    self._client = HttpClient(self._url, logger=LOG)
    self._root = Resource(self._client)
    self._security_enabled = security_enabled

    if self._security_enabled:
      self._client.set_kerberos_auth()

  def __str__(self):
    return "ResourceManagerApi at %s" % (self._url,)

  @property
  def url(self):
    return self._url

  @property
  def security_enabled(self):
    return self._security_enabled

  def apps(self, **kwargs):
    return self._root.get('cluster/apps', params=kwargs, headers={'Accept': _JSON_CONTENT_TYPE})

  def app(self, app_id):
    return self._root.get('cluster/apps/%(app_id)s' % {'app_id': app_id}, headers={'Accept': _JSON_CONTENT_TYPE})
Ejemplo n.º 2
0
class ResourceManagerApi(object):
  def __init__(self, oozie_url, security_enabled=False, ssl_cert_ca_verify=False):
    self._url = posixpath.join(oozie_url, 'ws', _API_VERSION)
    self._client = HttpClient(self._url, logger=LOG)
    self._root = Resource(self._client)
    self._security_enabled = security_enabled
    self._ssl_cert_ca_verify = ssl_cert_ca_verify

    if self._security_enabled:
      self._client.set_kerberos_auth()
      if ssl_cert_ca_verify:
        self._client.set_verify(True)

  def __str__(self):
    return "ResourceManagerApi at %s" % (self._url,)

  @property
  def url(self):
    return self._url

  @property
  def security_enabled(self):
    return self._security_enabled

  def cluster(self, **kwargs):
    return self._root.get('cluster', params=kwargs, headers={'Accept': _JSON_CONTENT_TYPE})

  def apps(self, **kwargs):
    return self._root.get('cluster/apps', params=kwargs, headers={'Accept': _JSON_CONTENT_TYPE})

  def app(self, app_id):
    return self._root.get('cluster/apps/%(app_id)s' % {'app_id': app_id}, headers={'Accept': _JSON_CONTENT_TYPE})

  def kill(self, app_id):
    return self._root.put('cluster/apps/%(app_id)s/state' % {'app_id': app_id}, data=json.dumps({'state': 'KILLED'}), contenttype=_JSON_CONTENT_TYPE)
Ejemplo n.º 3
0
class ResourceManagerApi(object):
  def __init__(self, oozie_url, security_enabled=False):
    self._url = posixpath.join(oozie_url, 'ws', _API_VERSION)
    self._client = HttpClient(self._url, logger=LOG)
    self._root = Resource(self._client)
    self._security_enabled = security_enabled

    if self._security_enabled:
      self._client.set_kerberos_auth()

  def __str__(self):
    return "NodeManagerApi at %s" % (self._url,)

  @property
  def url(self):
    return self._url

  @property
  def security_enabled(self):
    return self._security_enabled

  def containers(self):
    return self._root.get('node/containers', headers={'Accept': _JSON_CONTENT_TYPE})

  def container(self, container_id):
    return self._root.get('node/containers/%(container_id)s' % {'container_id': container_id}, headers={'Accept': _JSON_CONTENT_TYPE})
Ejemplo n.º 4
0
class ResourceManagerApi(object):
    def __init__(self, oozie_url, security_enabled=False, ssl_cert_ca_verify=True):
        self._url = posixpath.join(oozie_url, "ws", _API_VERSION)
        self._client = HttpClient(self._url, logger=LOG)
        self._root = Resource(self._client)
        self._security_enabled = security_enabled

        if self._security_enabled:
            self._client.set_kerberos_auth()

        self._client.set_verify(ssl_cert_ca_verify)

    def __str__(self):
        return "NodeManagerApi at %s" % (self._url,)

    @property
    def url(self):
        return self._url

    @property
    def security_enabled(self):
        return self._security_enabled

    def containers(self):
        return self._root.get("node/containers", headers={"Accept": _JSON_CONTENT_TYPE})

    def container(self, container_id):
        return self._root.get(
            "node/containers/%(container_id)s" % {"container_id": container_id}, headers={"Accept": _JSON_CONTENT_TYPE}
        )
Ejemplo n.º 5
0
class JobServerApi(object):
    def __init__(self, oozie_url):
        self._url = posixpath.join(oozie_url)
        self._client = HttpClient(self._url, logger=LOG)
        self._root = Resource(self._client)
        self._security_enabled = False
        # To store user info
        self._thread_local = threading.local()

    def __str__(self):
        return "JobServerApi at %s" % (self._url,)

    @property
    def url(self):
        return self._url

    @property
    def security_enabled(self):
        return self._security_enabled

    @property
    def user(self):
        return self._thread_local.user

    def setuser(self, user):
        if hasattr(user, "username"):
            self._thread_local.user = user.username
        else:
            self._thread_local.user = user

    def get_status(self, **kwargs):
        return self._root.get("healthz", params=kwargs, headers={"Accept": _TEXT_CONTENT_TYPE})

    def submit_job(self, appName, classPath, data, context=None, sync=False):
        params = {"appName": appName, "classPath": classPath, "sync": sync}
        if context:
            params["context"] = context
        return self._root.post("jobs" % params, params=params, data=data, contenttype=_BINARY_CONTENT_TYPE)

    def job(self, job_id):
        return self._root.get("jobs/%s" % job_id, headers={"Accept": _JSON_CONTENT_TYPE})

    def jobs(self, **kwargs):
        return self._root.get("jobs", params=kwargs, headers={"Accept": _JSON_CONTENT_TYPE})

    def create_context(self, name, **kwargs):
        return self._root.post("contexts/%s" % name, params=kwargs, contenttype=_BINARY_CONTENT_TYPE)

    def contexts(self, **kwargs):
        return self._root.get("contexts", params=kwargs, headers={"Accept": _JSON_CONTENT_TYPE})

    def delete_context(self, name, **kwargs):
        return self._root.delete("contexts/%s" % name)

    def upload_jar(self, app_name, data):
        return self._root.post("jars/%s" % app_name, data=data, contenttype=_BINARY_CONTENT_TYPE)

    def jars(self, **kwargs):
        return self._root.get("jars", params=kwargs, headers={"Accept": _JSON_CONTENT_TYPE})
Ejemplo n.º 6
0
class JobServerApi(object):
  def __init__(self, oozie_url):
    self._url = posixpath.join(oozie_url)
    self._client = HttpClient(self._url, logger=LOG)
    self._root = Resource(self._client)
    self._security_enabled = False
    self._thread_local = threading.local()

  def __str__(self):
    return "JobServerApi at %s" % (self._url,)

  @property
  def url(self):
    return self._url

  @property
  def security_enabled(self):
    return self._security_enabled

  @property
  def user(self):
    return self._thread_local.user

  def setuser(self, user):
    if hasattr(user, 'username'):
      self._thread_local.user = user.username
    else:
      self._thread_local.user = user
  
  def get_status(self):
    return self._root.get('sessions')

  def create_session(self, **kwargs):
    return self._root.post('sessions', data=json.dumps(kwargs), contenttype='application/json')

  def get_session(self, uuid):
    return self._root.get('sessions/%s' % uuid)

  def submit_statement(self, uuid, statement):
    data = {'code': statement}
    return self._root.post('sessions/%s/statements' % uuid, data=json.dumps(data), contenttype=_JSON_CONTENT_TYPE)

  def inspect(self, uuid, statement):
    data = {'code': statement}
    return self._root.post('sessions/%s/inspect' % uuid, data=json.dumps(data), contenttype=_JSON_CONTENT_TYPE)

  def fetch_data(self, session, statement):
    return self._root.get('sessions/%s/statements/%s' % (session, statement))

  def cancel(self, session):
    return self._root.post('sessions/%s/interrupt' % session)
Ejemplo n.º 7
0
class MapreduceApi(object):

  def __init__(self, oozie_url, security_enabled=False):
    self._url = posixpath.join(oozie_url, 'proxy')
    self._client = HttpClient(self._url, logger=LOG)
    self._root = Resource(self._client)
    self._security_enabled = security_enabled

    if self._security_enabled:
      self._client.set_kerberos_auth()

  def __str__(self):
    return "MapreduceApi at %s" % (self._url,)

  @property
  def url(self):
    return self._url

  def job(self, user, job_id):
    app_id = job_id.replace('job', 'application')
    return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s' % {'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE})

  def counters(self, job_id):
    app_id = job_id.replace('job', 'application')
    return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/counters' % {'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE})

  def tasks(self, job_id):
    app_id = job_id.replace('job', 'application')
    return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks' % {'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE})

  def job_attempts(self, job_id):
    app_id = job_id.replace('job', 'application')
    return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/jobattempts' % {'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE})

  def conf(self, job_id):
    app_id = job_id.replace('job', 'application')
    return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/conf' % {'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE})

  def task(self, job_id, task_id):
    app_id = job_id.replace('job', 'application')
    return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s' % {'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE})

  def task_counters(self, job_id, task_id):
    app_id = job_id.replace('job', 'application')
    job_id = job_id.replace('application', 'job')
    return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/counters' % {'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE})

  def task_attempts(self, job_id, task_id):
    app_id = job_id.replace('job', 'application')
    return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts' % {'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE})

  def task_attempt(self, job_id, task_id, attempt_id):
    app_id = job_id.replace('job', 'application')
    job_id = job_id.replace('application', 'job')
    return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts/%(attempt_id)s' % {'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'attempt_id': attempt_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE})
Ejemplo n.º 8
0
class HistoryServerApi(object):

  def __init__(self, oozie_url, security_enabled=False, ssl_cert_ca_verify=False):
    self._url = posixpath.join(oozie_url, 'ws/%s/history' % _API_VERSION)
    self._client = HttpClient(self._url, logger=LOG)
    self._root = Resource(self._client)
    self._security_enabled = security_enabled

    if self._security_enabled:
      self._client.set_kerberos_auth()

    self._client.set_verify(ssl_cert_ca_verify)

  def __str__(self):
    return "HistoryServerApi at %s" % (self._url,)

  @property
  def url(self):
    return self._url

  def job(self, user, job_id):
    return self._root.get('mapreduce/jobs/%(job_id)s' % {'job_id': job_id}, headers={'Accept': _JSON_CONTENT_TYPE})

  def counters(self, job_id):
    return self._root.get('mapreduce/jobs/%(job_id)s/counters' % {'job_id': job_id}, headers={'Accept': _JSON_CONTENT_TYPE})

  def conf(self, job_id):
    return self._root.get('mapreduce/jobs/%(job_id)s/conf' % {'job_id': job_id}, headers={'Accept': _JSON_CONTENT_TYPE})

  def job_attempts(self, job_id):
    return self._root.get('mapreduce/jobs/%(job_id)s/jobattempts' % {'job_id': job_id}, headers={'Accept': _JSON_CONTENT_TYPE})

  def tasks(self, job_id):
    return self._root.get('mapreduce/jobs/%(job_id)s/tasks' % {'job_id': job_id}, headers={'Accept': _JSON_CONTENT_TYPE})

  def task(self, job_id, task_id):
    return self._root.get('mapreduce/jobs/%(job_id)s/tasks/%(task_id)s' % {'job_id': job_id, 'task_id': task_id}, headers={'Accept': _JSON_CONTENT_TYPE})

  def task_attempts(self, job_id, task_id):
    return self._root.get('mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts' % {'job_id': job_id, 'task_id': task_id}, headers={'Accept': _JSON_CONTENT_TYPE})

  def task_counters(self, job_id, task_id):
    job_id = job_id.replace('application', 'job')
    return self._root.get('mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/counters' % {'job_id': job_id, 'task_id': task_id}, headers={'Accept': _JSON_CONTENT_TYPE})

  def task_attempt(self, job_id, task_id, attempt_id):
    return self._root.get('mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts/%(attempt_id)s' % {'job_id': job_id, 'task_id': task_id, 'attempt_id': attempt_id}, headers={'Accept': _JSON_CONTENT_TYPE})

  def task_attempt_counters(self, job_id, task_id, attempt_id):
    return self._root.get('mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts/%(attempt_id)s/counters' % {'job_id': job_id, 'task_id': task_id, 'attempt_id': attempt_id}, headers={'Accept': _JSON_CONTENT_TYPE})
Ejemplo n.º 9
0
class SolrApi(object):
  """
  http://wiki.apache.org/solr/CoreAdmin#CoreAdminHandler
  """
  def __init__(self, solr_url):
    self._url = solr_url
    self._client = HttpClient(self._url, logger=LOG)
    self._root = Resource(self._client)

  def query(self, solr_query, hue_core):
    try:
      params = (
          ('q', solr_query['q'] or EMPTY_QUERY.get()),
          ('wt', 'json'),
          ('rows', solr_query['rows']),
          ('start', solr_query['start']),
      )

      params += hue_core.get_query()

      fqs = solr_query['fq'].split('|')
      for fq in fqs:
        if fq:
          params += (('fq', fq),)

      response = self._root.get('%(collection)s/select' % solr_query, params)
      return json.loads(response)
    except RestException, e:
      raise PopupException('Error while accessing Solr: %s' % e)
Ejemplo n.º 10
0
Archivo: api.py Proyecto: Roxasora/hue
class SolrApi(object):
  """
  http://wiki.apache.org/solr/CoreAdmin#CoreAdminHandler
  """
  def __init__(self, solr_url):
    self._url = solr_url
    self._client = HttpClient(self._url, logger=LOG)
    if SECURITY_ENABLED.get():
      self._client.set_kerberos_auth()
    self._root = Resource(self._client)

  def query(self, solr_query, hue_core):
    try:
      params = (
          ('q', solr_query['q'] or EMPTY_QUERY.get()),
          ('wt', 'json'),
          ('rows', solr_query['rows']),
          ('start', solr_query['start']),
      )

      params += hue_core.get_query(solr_query)

      fqs = solr_query['fq'].split('|')
      for fq in fqs:
        if fq:
          params += (('fq', fq),)

      response = self._root.get('%(collection)s/select' % solr_query, params)

      if type(response) != dict:
        # Got 'plain/text' mimetype instead of 'application/json'
        response = json.loads(response)
      return response
    except RestException, e:
      raise PopupException('Error while accessing Solr: %s' % e)
Ejemplo n.º 11
0
    def get_task_log(self, offset=0):
        logs = []
        attempt = self.task.job.job_attempts["jobAttempt"][-1]
        log_link = attempt["logsLink"]
        # Get MR task logs
        if self.assignedContainerId:
            log_link = log_link.replace(attempt["containerId"], self.assignedContainerId)
        if hasattr(self, "nodeHttpAddress"):
            log_link = log_link.replace(attempt["nodeHttpAddress"].split(":")[0], self.nodeHttpAddress.split(":")[0])

        for name in ("stdout", "stderr", "syslog"):
            link = "/%s/" % name
            params = {}
            if int(offset) >= 0:
                params["start"] = offset

            try:
                log_link = re.sub("job_[^/]+", self.id, log_link)
                root = Resource(get_log_client(log_link), urlparse.urlsplit(log_link)[2], urlencode=False)
                response = root.get(link, params=params)
                log = html.fromstring(response).xpath("/html/body/table/tbody/tr/td[2]")[0].text_content()
            except Exception, e:
                log = _("Failed to retrieve log: %s") % e

            logs.append(log)
Ejemplo n.º 12
0
  def get_task_log(self, offset=0):
    logs = []
    attempt = self.task.job.job_attempts['jobAttempt'][-1]
    log_link = attempt['logsLink']
    # Get MR task logs
    if self.assignedContainerId:
      log_link = log_link.replace(attempt['containerId'], self.assignedContainerId)
    if hasattr(self, 'nodeHttpAddress'):
      log_link = log_link.replace(attempt['nodeHttpAddress'].split(':')[0], self.nodeHttpAddress.split(':')[0])

    for name in ('stdout', 'stderr', 'syslog'):
      link = '/%s/' % name
      params = {}
      if int(offset) >= 0:
        params['start'] = offset

      try:
        log_link = re.sub('job_[^/]+', self.id, log_link)
        root = Resource(get_log_client(log_link), urlparse.urlsplit(log_link)[2], urlencode=False)
        response = root.get(link, params=params)
        log = html.fromstring(response).xpath('/html/body/table/tbody/tr/td[2]')[0].text_content()
      except Exception, e:
        log = _('Failed to retrieve log: %s') % e

      logs.append(log)
Ejemplo n.º 13
0
class SolrApi(object):
    """
  http://wiki.apache.org/solr/CoreAdmin#CoreAdminHandler
  """

    def __init__(self, solr_url):
        self._url = solr_url
        self._client = HttpClient(self._url, logger=LOG)
        self._root = Resource(self._client)

    def query(self, solr_query, hue_core):
        try:
            params = (
                ("q", solr_query["q"] or EMPTY_QUERY.get()),
                ("wt", "json"),
                ("rows", solr_query["rows"]),
                ("start", solr_query["start"]),
            )

            params += hue_core.get_query(solr_query)

            fqs = solr_query["fq"].split("|")
            for fq in fqs:
                if fq:
                    params += (("fq", fq),)

            response = self._root.get("%(collection)s/select" % solr_query, params)

            if type(response) != dict:
                # Got 'plain/text' mimetype instead of 'application/json'
                response = json.loads(response)
            return response
        except RestException, e:
            raise PopupException("Error while accessing Solr: %s" % e)
Ejemplo n.º 14
0
class SparkHistoryServerApi(object):

  def __init__(self, spark_hs_url, security_enabled=False, ssl_cert_ca_verify=False):
    self._ui_url = spark_hs_url
    self._url = posixpath.join(spark_hs_url, 'api/%s/' % _API_VERSION)
    self._client = HttpClient(self._url, logger=LOG)
    self._root = Resource(self._client)
    self._security_enabled = security_enabled

    if self._security_enabled:
      self._client.set_kerberos_auth()

    self._client.set_verify(ssl_cert_ca_verify)

  def __str__(self):
    return "Spark History Server API at %s" % (self._url,)

  @property
  def url(self):
    return self._url

  @property
  def ui_url(self):
    return self._ui_url

  @property
  def headers(self):
    return {'Accept': _JSON_CONTENT_TYPE}

  def applications(self):
    return self._root.get('applications', headers=self.headers)

  def application(self, app_id):
    return self._root.get('applications/%(app_id)s' % {'app_id': app_id}, headers=self.headers)

  def jobs(self, app_id, attempt_id):
    return self._root.get('applications/%(app_id)s/%(attempt_id)s/jobs' % {'app_id': app_id, 'attempt_id': attempt_id}, headers=self.headers)

  def stages(self, app_id, attempt_id):
    return self._root.get('applications/%(app_id)s/%(attempt_id)s/stages' % {'app_id': app_id, 'attempt_id': attempt_id}, headers=self.headers)

  def executors(self, app_id, attempt_id):
    return self._root.get('applications/%(app_id)s/%(attempt_id)s/executors' % {'app_id': app_id, 'attempt_id': attempt_id}, headers=self.headers)
Ejemplo n.º 15
0
class ImpalaDaemonApi(object):

  def __init__(self, server_url):
    self._url = server_url
    self._client = HttpClient(self._url, logger=LOG)
    # You can set username/password for Impala Web UI which overrides kerberos
    if DAEMON_API_USERNAME.get() is not None and DAEMON_API_PASSWORD.get() is not None:
      self._client.set_digest_auth(DAEMON_API_USERNAME.get(), DAEMON_API_PASSWORD.get())

    self._root = Resource(self._client)
    self._security_enabled = False
    self._thread_local = threading.local()


  def __str__(self):
    return "ImpalaDaemonApi at %s" % self._url


  @property
  def url(self):
    return self._url


  @property
  def security_enabled(self):
    return self._security_enabled


  @property
  def user(self):
    return self._thread_local.user


  def set_user(self, user):
    if hasattr(user, 'username'):
      self._thread_local.user = user.username
    else:
      self._thread_local.user = user


  def get_queries(self):
    params = {
      'json': 'true'
    }

    resp = self._root.get('queries', params=params)
    try:
      if isinstance(resp, basestring):
        return json.loads(resp)
      else:
        return resp
    except ValueError, e:
      raise ImpalaDaemonApiException('ImpalaDaemonApi did not return valid JSON: %s' % e)
Ejemplo n.º 16
0
 def get_log_list(self):
   log_link, user = self.get_log_link()
   if not log_link:
     return []
   params = {
     'doAs': user
   }
   log_link = re.sub('job_[^/]+', str(self.id), log_link)
   root = Resource(get_log_client(log_link), urlparse.urlsplit(log_link)[2], urlencode=False)
   response = root.get('/', params=params)
   links = html.fromstring(response, parser=html.HTMLParser()).xpath('/html/body/table/tbody/tr/td[2]//a/@href')
   parsed_links = map(lambda x: urlparse.urlsplit(x), links)
   return map(lambda x: x and len(x) >= 2 and x[2].split('/')[-2] or '', parsed_links)
Ejemplo n.º 17
0
Archivo: server.py Proyecto: 10sr/hue
class ImpalaDaemonApi(object):

  def __init__(self, server_url):
    self._url = server_url
    self._client = HttpClient(self._url, logger=LOG)
    self._root = Resource(self._client)
    self._security_enabled = False
    self._thread_local = threading.local()


  def __str__(self):
    return "ImpalaDaemonApi at %s" % self._url


  @property
  def url(self):
    return self._url


  @property
  def security_enabled(self):
    return self._security_enabled


  @property
  def user(self):
    return self._thread_local.user


  def set_user(self, user):
    if hasattr(user, 'username'):
      self._thread_local.user = user.username
    else:
      self._thread_local.user = user


  def get_query_profile(self, query_id):
    params = {
      'query_id': query_id,
      'json': 'true'
    }
    profile = None
    resp = self._root.get('query_profile', params=params)
    try:
      profile = json.loads(resp)
    except ValueError, e:
      raise ImpalaDaemonApiException('ImpalaDaemonApi query_profile did not return valid JSON.')
    return profile
Ejemplo n.º 18
0
class SolrApi(object):
  """
  http://wiki.apache.org/solr/CoreAdmin#CoreAdminHandler
  """
  def __init__(self, solr_url, user):
    self._url = solr_url
    self._user = user
    self._client = HttpClient(self._url, logger=LOG)
    self.security_enabled = SECURITY_ENABLED.get()
    if self.security_enabled:
      self._client.set_kerberos_auth()
    self._root = Resource(self._client)

  def _get_params(self):
    if self.security_enabled:
      return (('doAs', self._user ),)
    return (('user.name', DEFAULT_USER), ('doAs', self._user),)

  def query(self, solr_query, hue_core):
    try:
      params = self._get_params() + (
          ('q', solr_query['q'] or EMPTY_QUERY.get()),
          ('wt', 'json'),
          ('rows', solr_query['rows']),
          ('start', solr_query['start']),
      )

      params += hue_core.get_query(solr_query)

      fqs = solr_query['fq'].split('|')
      for fq in fqs:
        if fq:
          params += (('fq', fq),)

      response = self._root.get('%(collection)s/select' % solr_query, params)

      if type(response) != dict:
        # Got 'plain/text' mimetype instead of 'application/json'
        try:
          response = json.loads(response)
        except ValueError, e:
          # Got some null bytes in the response
          LOG.error('%s: %s' % (unicode(e), repr(response)))
          response = json.loads(response.replace('\x00', ''))
      return response
    except RestException, e:
      raise PopupException(e, title=_('Error while accessing Solr'))
Ejemplo n.º 19
0
  def get_task_log(self, offset=0):
    logs = []

    log_link, user = self.get_log_link()
    if not log_link:
      return ['', '', '']

    for name in ('stdout', 'stderr', 'syslog'):
      link = '/%s/' % name
      if self.type == 'Oozie Launcher' and not self.task.job.status == 'FINISHED': # Yarn currently dumps with 500 error with doas in running state
        params = {}
      else:
        params = {
          'doAs': user
        }

      if int(offset) != 0:
        params['start'] = offset
      else:
        params['start'] = 0

      response = None
      try:
        log_link = re.sub('job_[^/]+', str(self.id), log_link)
        root = Resource(get_log_client(log_link), urlparse.urlsplit(log_link)[2], urlencode=False)
        response = root.get(link, params=params)
        log = html.fromstring(response, parser=html.HTMLParser()).xpath('/html/body/table/tbody/tr/td[2]')[0].text_content()
      except Exception, e:
        log = _('Failed to retrieve log: %s' % e)
        try:
          debug_info = '\nLog Link: %s' % log_link
          if response:
            debug_info += '\nHTML Response: %s' % response
          LOG.error(debug_info)
        except:
          LOG.exception('failed to build debug info')

      logs.append(log)
Ejemplo n.º 20
0
  def get_task_log(self, offset=0):
    logs = []
    attempt = self.task.job.job_attempts['jobAttempt'][-1]
    log_link = attempt['logsLink']
    # Get MR task logs

    # Don't hack up the urls if they've been migrated to the job history server.
    for cluster in YARN_CLUSTERS.get().itervalues():
      if log_link.startswith(cluster.HISTORY_SERVER_API_URL.get()):
        break
    else:
      if self.assignedContainerId:
        log_link = log_link.replace(attempt['containerId'], self.assignedContainerId)
      if hasattr(self, 'nodeHttpAddress'):
        log_link = log_link.replace(attempt['nodeHttpAddress'].split(':')[0], self.nodeHttpAddress.split(':')[0])

    for name in ('stdout', 'stderr', 'syslog'):
      link = '/%s/' % name
      params = {}
      if int(offset) >= 0:
        params['start'] = offset

      try:
        log_link = re.sub('job_[^/]+', self.id, log_link)
        root = Resource(get_log_client(log_link), urlparse.urlsplit(log_link)[2], urlencode=False)
        response = root.get(link, params=params)
        log = html.fromstring(response, parser=html.HTMLParser()).xpath('/html/body/table/tbody/tr/td[2]')[0].text_content()
      except Exception, e:
        log = _('Failed to retrieve log: %s' % e)
        try:
          debug_info = '\nLog Link: %s' % log_link
          debug_info += '\nHTML Response: %s' % response
          LOGGER.error(debug_info)
        except:
          LOG.exception('failed to build debug info')

      logs.append(log)
Ejemplo n.º 21
0
class FlinkSqlClient():
    '''
  Implements https://github.com/ververica/flink-sql-gateway
  Could be a pip module or sqlalchemy dialect in the future.
  '''
    def __init__(self, user, api_url):
        self.user = user
        self._url = posixpath.join(api_url + '/' + _API_VERSION + '/')
        self._client = HttpClient(self._url, logger=LOG)
        self._root = Resource(self._client)

    def __str__(self):
        return "FlinkClient at %s" % (self._url, )

    def info(self):
        return self._root.get('info')

    def create_session(self, **properties):
        data = {
            "session_name": "test",  # optional
            "planner": "blink",  # required, "old"/"blink"
            "execution_type": "streaming",  # required, "batch"/"streaming"
            "properties": {  # optional
                "key": "value"
            }
        }
        data.update(properties)

        return self._root.post('sessions',
                               data=json.dumps(data),
                               contenttype=_JSON_CONTENT_TYPE)

    def session_heartbeat(self, session_id):
        return self._root.post('sessions/%(session_id)s/heartbeat' %
                               {'session_id': session_id})

    def execute_statement(self, session_id, statement):
        data = {
            "statement": statement,  # required
            "execution_timeout":
            ""  # execution time limit in milliseconds, optional, but required for stream SELECT ?
        }

        return self._root.post('sessions/%(session_id)s/statements' %
                               {'session_id': session_id},
                               data=json.dumps(data),
                               contenttype=_JSON_CONTENT_TYPE)

    def fetch_status(self, session_id, job_id):
        return self._root.get(
            'sessions/%(session_id)s/jobs/%(job_id)s/status' % {
                'session_id': session_id,
                'job_id': job_id
            })

    def fetch_results(self, session_id, job_id, token=0):
        return self._root.get(
            'sessions/%(session_id)s/jobs/%(job_id)s/result/%(token)s' % {
                'session_id': session_id,
                'job_id': job_id,
                'token': token
            })

    def close_statement(self, session_id, job_id):
        return self._root.delete('sessions/%(session_id)s/jobs/%(job_id)s' % {
            'session_id': session_id,
            'job_id': job_id,
        })

    def close_session(self, session_id):
        return self._root.delete('sessions/%(session_id)s' % {
            'session_id': session_id,
        })
Ejemplo n.º 22
0
class ResourceManagerApi(object):
    def __init__(self,
                 oozie_url,
                 security_enabled=False,
                 ssl_cert_ca_verify=False):
        self._url = posixpath.join(oozie_url, 'ws', _API_VERSION)
        self._client = HttpClient(self._url, logger=LOG)
        self._root = Resource(self._client)
        self._security_enabled = security_enabled

        if self._security_enabled:
            self._client.set_kerberos_auth()

        self._client.set_verify(ssl_cert_ca_verify)

    def __str__(self):
        return "ResourceManagerApi at %s" % (self._url, )

    @property
    def url(self):
        return self._url

    @property
    def security_enabled(self):
        return self._security_enabled

    def cluster(self, **kwargs):
        return self._root.get('cluster',
                              params=kwargs,
                              headers={'Accept': _JSON_CONTENT_TYPE})
        return self._execute(self._root.get,
                             'cluster',
                             params=kwargs,
                             headers={'Accept': _JSON_CONTENT_TYPE})

    def apps(self, **kwargs):
        return self._root.get('cluster/apps',
                              params=kwargs,
                              headers={'Accept': _JSON_CONTENT_TYPE})
        return self._execute(self._root.get,
                             'cluster/apps',
                             params=kwargs,
                             headers={'Accept': _JSON_CONTENT_TYPE})

    def app(self, app_id):
        return self._root.get('cluster/apps/%(app_id)s' % {'app_id': app_id},
                              headers={'Accept': _JSON_CONTENT_TYPE})
        return self._execute(self._root.get,
                             'cluster/apps/%(app_id)s' % {'app_id': app_id},
                             headers={'Accept': _JSON_CONTENT_TYPE})

    def kill(self, app_id):
        return self._root.put('cluster/apps/%(app_id)s/state' %
                              {'app_id': app_id},
                              data=json.dumps({'state': 'KILLED'}),
                              contenttype=_JSON_CONTENT_TYPE)
        return self._execute(self._root.put,
                             'cluster/apps/%(app_id)s/state' %
                             {'app_id': app_id},
                             data=json.dumps({'state': 'KILLED'}),
                             contenttype=_JSON_CONTENT_TYPE)

    def _execute(self, function, *args, **kwargs):
        response = function(*args, **kwargs)

        # YARN-2605: Yarn does not use proper HTTP redirects when the standby RM has
        # failed back to the master RM.
        if isinstance(response, str) and response.startswith(
                'This is standby RM. Redirecting to the current active RM'):
            raise YarnFailoverOccurred(response)

        return response
Ejemplo n.º 23
0
def job_attempt_logs_json(request,
                          job,
                          attempt_index=0,
                          name='syslog',
                          offset=LOG_OFFSET_BYTES,
                          is_embeddable=False):
    """For async log retrieval as Yarn servers are very slow"""
    log_link = None
    response = {'status': -1}

    try:
        jt = get_api(request.user, request.jt)
        app = jt.get_application(job.jobId)

        if app['applicationType'] == 'MAPREDUCE':
            if app['finalStatus'] in ('SUCCEEDED', 'FAILED', 'KILLED'):
                attempt_index = int(attempt_index)
                if not job.job_attempts['jobAttempt']:
                    response = {'status': 0, 'log': _('Job has no tasks')}
                else:
                    attempt = job.job_attempts['jobAttempt'][attempt_index]

                    log_link = attempt['logsLink']
                    # Reformat log link to use YARN RM, replace node addr with node ID addr
                    log_link = log_link.replace(attempt['nodeHttpAddress'],
                                                attempt['nodeId'])
            elif app['state'] == 'RUNNING':
                log_link = app['amContainerLogs']
        elif app.get('amContainerLogs'):
            log_link = app.get('amContainerLogs')
    except (KeyError, RestException) as e:
        raise KeyError(
            _("Cannot find job attempt '%(id)s'.") % {'id': job.jobId}, e)
    except Exception as e:
        raise Exception(
            _("Failed to get application for job %s: %s") % (job.jobId, e))

    if log_link:
        link = '/%s/' % name
        params = {'doAs': request.user.username}

        if offset != 0:
            params['start'] = offset

        root = Resource(get_log_client(log_link),
                        urllib.parse.urlsplit(log_link)[2],
                        urlencode=False)
        api_resp = None

        try:
            api_resp = root.get(link, params=params)
            log = html.fromstring(api_resp, parser=html.HTMLParser()).xpath(
                '/html/body/table/tbody/tr/td[2]')[0].text_content()

            response['status'] = 0
            response['log'] = LinkJobLogs._make_hdfs_links(log, is_embeddable)
        except Exception as e:
            response['log'] = _('Failed to retrieve log: %s' % e)
            try:
                debug_info = '\nLog Link: %s' % log_link
                if api_resp:
                    debug_info += '\nHTML Response: %s' % response
                response['debug'] = debug_info
                LOG.error(debug_info)
            except:
                LOG.exception('failed to create debug info')

    return JsonResponse(response)
Ejemplo n.º 24
0
class ManagerApi(object):
    """
  https://cloudera.github.io/cm_api/
  """
    def __init__(self,
                 user=None,
                 security_enabled=False,
                 ssl_cert_ca_verify=False):
        self._api_url = '%s/%s' % (MANAGER.API_URL.get().strip('/'), VERSION)
        self._username = get_navigator_auth_username()
        self._password = get_navigator_auth_password()

        self.user = user
        self._client = HttpClient(self._api_url, logger=LOG)

        if security_enabled:
            self._client.set_kerberos_auth()
        else:
            self._client.set_basic_auth(self._username, self._password)

        self._client.set_verify(ssl_cert_ca_verify)
        self._root = Resource(self._client)

    def has_service(self, service_name, cluster_name=None):
        cluster = self._get_cluster(cluster_name)
        try:
            services = self._root.get(
                'clusters/%(cluster_name)s/serviceTypes' % {
                    'cluster_name': cluster['name'],
                    'service_name': service_name
                })['items']

            return service_name in services
        except RestException as e:
            raise ManagerApiException(e)

    def get_spark_history_server_configs(self, cluster_name=None):
        service_name = "SPARK_ON_YARN"
        shs_role_type = "SPARK_YARN_HISTORY_SERVER"

        try:
            cluster = self._get_cluster(cluster_name)
            services = self._root.get('clusters/%(cluster_name)s/services' % {
                'cluster_name': cluster['name'],
                'service_name': service_name
            })['items']

            service_display_names = [
                service['displayName'] for service in services
                if service['type'] == service_name
            ]

            if service_display_names:
                spark_service_display_name = service_display_names[0]

                servers = self._root.get(
                    'clusters/%(cluster_name)s/services/%(spark_service_display_name)s/roles'
                    % {
                        'cluster_name': cluster['name'],
                        'spark_service_display_name':
                        spark_service_display_name
                    })['items']

                shs_server_names = [
                    server['name'] for server in servers
                    if server['type'] == shs_role_type
                ]
                shs_server_name = shs_server_names[
                    0] if shs_server_names else None
                shs_server_hostRef = [
                    server['hostRef'] for server in servers
                    if server['type'] == shs_role_type
                ]
                shs_server_hostId = shs_server_hostRef[0][
                    'hostId'] if shs_server_hostRef else None

                if shs_server_name and shs_server_hostId:
                    shs_server_configs = self._root.get(
                        'clusters/%(cluster_name)s/services/%(spark_service_display_name)s/roles/%(shs_server_name)s/config'
                        % {
                            'cluster_name': cluster['name'],
                            'spark_service_display_name':
                            spark_service_display_name,
                            'shs_server_name': shs_server_name
                        },
                        params={'view': 'full'})['items']
                    return shs_server_hostId, shs_server_configs
        except Exception as e:
            LOG.warning("Check Spark History Server via ManagerApi: %s" % e)

        return None, None

    def get_spark_history_server_url(self, cluster_name=None):
        shs_server_hostId, shs_server_configs = self.get_spark_history_server_configs(
            cluster_name=cluster_name)

        if shs_server_hostId and shs_server_configs:
            shs_ui_port = None
            shs_ssl_port = None
            shs_ssl_enabled = None
            for config in shs_server_configs:
                if 'relatedName' in config and 'default' in config:
                    if config['relatedName'] == 'spark.history.ui.port':
                        shs_ui_port = config['default']
                    if config['relatedName'] == 'spark.ssl.historyServer.port':
                        shs_ssl_port = config['default']
                    if config[
                            'relatedName'] == 'spark.ssl.historyServer.enabled':
                        shs_ssl_enabled = config['default']
            shs_ui_host = self._root.get('hosts/%(hostId)s' %
                                         {'hostId': shs_server_hostId})
            shs_ui_hostname = shs_ui_host['hostname'] if shs_ui_host else None

            return self.assemble_shs_url(shs_ui_hostname, shs_ui_port,
                                         shs_ssl_port, shs_ssl_enabled)

        return None

    def get_spark_history_server_security_enabled(self, cluster_name=None):
        shs_server_hostId, shs_server_configs = self.get_spark_history_server_configs(
            cluster_name=cluster_name)

        if shs_server_configs:
            for config in shs_server_configs:
                if 'relatedName' in config and 'default' in config and config[
                        'relatedName'] == 'history_server_spnego_enabled':
                    shs_security_enabled = config['default']
                    return shs_security_enabled and shs_security_enabled == 'true'

        return False

    def assemble_shs_url(self,
                         shs_ui_hostname,
                         shs_ui_port=None,
                         shs_ssl_port=None,
                         shs_ssl_enabled=None):
        if not shs_ui_hostname or not shs_ui_port or not shs_ssl_port or not shs_ssl_enabled:
            LOG.warning("Spark conf not found!")
            return None

        protocol = 'https' if shs_ssl_enabled.lower() == 'true' else 'http'
        shs_url = '%(protocol)s://%(hostname)s:%(port)s' % {
            'protocol':
            protocol,
            'hostname':
            shs_ui_hostname,
            'port':
            shs_ssl_port if shs_ssl_enabled.lower() == 'true' else shs_ui_port,
        }

        return shs_url

    def tools_echo(self):
        try:
            params = (('message', 'hello'), )

            LOG.info(params)
            return self._root.get('tools/echo', params=params)
        except RestException as e:
            raise ManagerApiException(e)

    def get_kafka_brokers(self, cluster_name=None):
        try:

            hosts = self._get_hosts('KAFKA',
                                    'KAFKA_BROKER',
                                    cluster_name=cluster_name)

            brokers_hosts = [host['hostname'] + ':9092' for host in hosts]

            return ','.join(brokers_hosts)
        except RestException as e:
            raise ManagerApiException(e)

    def get_kudu_master(self, cluster_name=None):
        try:
            cluster = self._get_cluster(cluster_name)
            services = self._root.get('clusters/%(name)s/services' %
                                      cluster)['items']

            service = [
                service for service in services if service['type'] == 'KUDU'
            ][0]
            master = self._get_roles(cluster['name'], service['name'],
                                     'KUDU_MASTER')[0]

            master_host = self._root.get('hosts/%(hostId)s' %
                                         master['hostRef'])

            return master_host['hostname']
        except RestException as e:
            raise ManagerApiException(e)

    def get_kafka_topics(self, broker_host):
        try:
            client = HttpClient('http://%s:24042' % broker_host, logger=LOG)
            root = Resource(client)

            return root.get('/api/topics')
        except RestException as e:
            raise ManagerApiException(e)

    def update_flume_config(self, cluster_name, config_name, config_value):
        service = 'FLUME-1'
        cluster = self._get_cluster(cluster_name)
        roleConfigGroup = [
            role['roleConfigGroupRef']['roleConfigGroupName']
            for role in self._get_roles(cluster['name'], service, 'AGENT')
        ]
        data = {
            u'items': [{
                u'url':
                u'/api/v8/clusters/%(cluster_name)s/services/%(service)s/roleConfigGroups/%(roleConfigGroups)s/config?message=Updated%20service%20and%20role%20type%20configurations.'
                .replace('%(cluster_name)s',
                         urllib_quote(cluster['name'])).replace(
                             '%(service)s',
                             service).replace('%(roleConfigGroups)s',
                                              roleConfigGroup[0]),
                u'body': {
                    u'items': [{
                        u'name': config_name,
                        u'value': config_value
                    }]
                },
                u'contentType':
                u'application/json',
                u'method':
                u'PUT'
            }]
        }

        return self.batch(items=data)

    def get_flume_agents(self, cluster_name=None):
        return [
            host['hostname'] for host in self._get_hosts(
                'FLUME', 'AGENT', cluster_name=cluster_name)
        ]

    def _get_hosts(self, service_name, role_name, cluster_name=None):
        try:
            cluster = self._get_cluster(cluster_name)
            services = self._root.get('clusters/%(name)s/services' %
                                      cluster)['items']

            service = [
                service for service in services
                if service['type'] == service_name
            ][0]
            hosts = self._get_roles(cluster['name'], service['name'],
                                    role_name)
            hosts_ids = [host['hostRef']['hostId'] for host in hosts]

            hosts = self._root.get('hosts')['items']
            return [host for host in hosts if host['hostId'] in hosts_ids]
        except RestException as e:
            raise ManagerApiException(e)

    def refresh_flume(self, cluster_name, restart=False):
        service = 'FLUME-1'
        cluster = self._get_cluster(cluster_name)
        roles = [
            role['name']
            for role in self._get_roles(cluster['name'], service, 'AGENT')
        ]

        if restart:
            return self.restart_services(cluster['name'], service, roles)
        else:
            return self.refresh_configs(cluster['name'], service, roles)

    def refresh_configs(self, cluster_name, service=None, roles=None):
        try:
            if service is None:
                return self._root.post(
                    'clusters/%(cluster_name)s/commands/refresh' %
                    {'cluster_name': cluster_name},
                    contenttype="application/json")
            elif roles is None:
                return self._root.post(
                    'clusters/%(cluster_name)s/services/%(service)s/roleCommands/refresh'
                    % {
                        'cluster_name': cluster_name,
                        'service': service
                    },
                    contenttype="application/json")
            else:
                return self._root.post(
                    'clusters/%(cluster_name)s/services/%(service)s/roleCommands/refresh'
                    % {
                        'cluster_name': cluster_name,
                        'service': service
                    },
                    data=json.dumps({"items": roles}),
                    contenttype="application/json")
        except RestException as e:
            raise ManagerApiException(e)

    def restart_services(self, cluster_name, service=None, roles=None):
        try:
            if service is None:
                return self._root.post(
                    'clusters/%(cluster_name)s/commands/restart' %
                    {'cluster_name': cluster_name},
                    contenttype="application/json")
            elif roles is None:
                return self._root.post(
                    'clusters/%(cluster_name)s/services/%(service)s/roleCommands/restart'
                    % {
                        'cluster_name': cluster_name,
                        'service': service
                    },
                    contenttype="application/json")
            else:
                return self._root.post(
                    'clusters/%(cluster_name)s/services/%(service)s/roleCommands/restart'
                    % {
                        'cluster_name': cluster_name,
                        'service': service
                    },
                    data=json.dumps({"items": roles}),
                    contenttype="application/json")
        except RestException as e:
            raise ManagerApiException(e)

    def batch(self, items):
        try:
            return self._root.post('batch',
                                   data=json.dumps(items),
                                   contenttype='application/json')
        except RestException as e:
            raise ManagerApiException(e)

    def _get_cluster(self, cluster_name=None):
        clusters = self._root.get('clusters/')['items']

        if cluster_name is not None:
            cluster = [
                cluster for cluster in clusters
                if cluster['name'] == cluster_name
            ][0]
        else:
            cluster = clusters[0]

        return cluster

    def _get_roles(self, cluster_name, service_name, role_type):
        roles = self._root.get(
            'clusters/%(cluster_name)s/services/%(service_name)s/roles' % {
                'cluster_name': cluster_name,
                'service_name': service_name
            })['items']
        return [role for role in roles if role['type'] == role_type]

    def get_impalad_config(self,
                           key=None,
                           impalad_host=None,
                           cluster_name=None):
        if not key or not impalad_host:
            return None

        service_name = "IMPALA"
        role_type = 'IMPALAD'

        try:
            cluster = self._get_cluster(cluster_name)
            services = self._root.get('clusters/%(cluster_name)s/services' % {
                'cluster_name': cluster['name'],
                'service_name': service_name
            })['items']

            service_display_names = [
                service['displayName'] for service in services
                if service['type'] == service_name
            ]

            hosts = self._root.get('hosts')['items']
            impalad_hostIds = [
                host['hostId'] for host in hosts
                if host['hostname'] == impalad_host
            ]

            if impalad_hostIds and service_display_names:
                impalad_hostId = impalad_hostIds[0]
                impala_service_display_name = service_display_names[0]

                servers = self._root.get(
                    'clusters/%(cluster_name)s/services/%(spark_service_display_name)s/roles'
                    % {
                        'cluster_name': cluster['name'],
                        'spark_service_display_name':
                        impala_service_display_name
                    })['items']

                impalad_server_names = [
                    server['name'] for server in servers
                    if server['type'] == role_type
                    and server['hostRef']['hostId'] == impalad_hostId
                ]
                impalad_server_name = impalad_server_names[
                    0] if impalad_server_names else None

                if impalad_server_name:
                    server_configs = self._root.get(
                        'clusters/%(cluster_name)s/services/%(spark_service_display_name)s/roles/%(shs_server_name)s/config'
                        % {
                            'cluster_name': cluster['name'],
                            'spark_service_display_name':
                            impala_service_display_name,
                            'shs_server_name': impalad_server_name
                        },
                        params={'view': 'full'})['items']

                    for config in server_configs:
                        if 'relatedName' in config and 'value' in config:
                            if config['relatedName'] == key:
                                return config['value']

        except Exception as e:
            LOG.warning(
                "Get Impala Daemon API configurations via ManangerAPI: %s" % e)

        return None
Ejemplo n.º 25
0
  def get_task_log(self, offset=0):
    logs = []
    attempt = self.task.job.job_attempts['jobAttempt'][-1]
    log_link = attempt['logsLink']

    # Generate actual task log link from logsLink url
    if self.task.job.status in ('NEW', 'SUBMITTED', 'RUNNING'):
      logs_path = '/node/containerlogs/'
      node_url, tracking_path = log_link.split(logs_path)
      container_id, user = tracking_path.strip('/').split('/')

      # Replace log path tokens with actual container properties if available
      if hasattr(self, 'nodeHttpAddress') and 'nodeId' in attempt:
        node_url = '%s://%s' % (node_url.split('://')[0], self.nodeHttpAddress)
      container_id = self.assignedContainerId if hasattr(self, 'assignedContainerId') else container_id

      log_link = '%(node_url)s/%(logs_path)s/%(container)s/%(user)s' % {
        'node_url': node_url,
        'logs_path': logs_path.strip('/'),
        'container': container_id,
        'user': user
      }
    else:  # Completed jobs
      logs_path = '/jobhistory/logs/'
      root_url, tracking_path = log_link.split(logs_path)
      node_url, container_id, attempt_id, user = tracking_path.strip('/').split('/')

      # Replace log path tokens with actual attempt properties if available
      if hasattr(self, 'nodeHttpAddress') and 'nodeId' in attempt:
        node_url = '%s:%s' % (self.nodeHttpAddress.split(':')[0], attempt['nodeId'].split(':')[1])
      container_id = self.assignedContainerId if hasattr(self, 'assignedContainerId') else container_id
      attempt_id = self.attemptId if hasattr(self, 'attemptId') else attempt_id

      log_link = '%(root_url)s/%(logs_path)s/%(node)s/%(container)s/%(attempt)s/%(user)s' % {
        'root_url': root_url,
        'logs_path': logs_path.strip('/'),
        'node': node_url,
        'container': container_id,
        'attempt': attempt_id,
        'user': user
      }

    for name in ('stdout', 'stderr', 'syslog'):
      link = '/%s/' % name
      params = {}
      if int(offset) != 0:
        params['start'] = offset

      response = None
      try:
        log_link = re.sub('job_[^/]+', self.id, log_link)
        root = Resource(get_log_client(log_link), urlparse.urlsplit(log_link)[2], urlencode=False)
        response = root.get(link, params=params)
        log = html.fromstring(response, parser=html.HTMLParser()).xpath('/html/body/table/tbody/tr/td[2]')[0].text_content()
      except Exception, e:
        log = _('Failed to retrieve log: %s' % e)
        try:
          debug_info = '\nLog Link: %s' % log_link
          if response:
            debug_info += '\nHTML Response: %s' % response
          LOG.error(debug_info)
        except:
          LOG.exception('failed to build debug info')

      logs.append(log)
Ejemplo n.º 26
0
class HistoryServerApi(object):
    def __init__(self,
                 oozie_url,
                 security_enabled=False,
                 ssl_cert_ca_verify=False):
        self._url = posixpath.join(oozie_url, 'ws/%s/history' % _API_VERSION)
        self._client = HttpClient(self._url, logger=LOG)
        self._root = Resource(self._client)
        self._security_enabled = security_enabled
        self._thread_local = threading.local()  # To store user info

        if self._security_enabled:
            self._client.set_kerberos_auth()

        self._client.set_verify(ssl_cert_ca_verify)

    def __str__(self):
        return "HistoryServerApi at %s" % (self._url, )

    def _get_params(self):
        params = {}

        if self.username != DEFAULT_USER.get():  # We impersonate if needed
            params['doAs'] = self.username
            if not self._security_enabled:
                params['user.name'] = DEFAULT_USER.get()

        return params

    @property
    def url(self):
        return self._url

    @property
    def user(self):
        return self.username  # Backward compatibility

    @property
    def username(self):
        try:
            return self._thread_local.user
        except AttributeError:
            return DEFAULT_USER.get()

    def setuser(self, user):
        curr = self.user
        self._thread_local.user = user
        return curr

    def job(self, user, job_id):
        return self._root.get('mapreduce/jobs/%(job_id)s' % {'job_id': job_id},
                              params=self._get_params(),
                              headers={'Accept': _JSON_CONTENT_TYPE})

    def counters(self, job_id):
        return self._root.get('mapreduce/jobs/%(job_id)s/counters' %
                              {'job_id': job_id},
                              params=self._get_params(),
                              headers={'Accept': _JSON_CONTENT_TYPE})

    def conf(self, job_id):
        return self._root.get('mapreduce/jobs/%(job_id)s/conf' %
                              {'job_id': job_id},
                              params=self._get_params(),
                              headers={'Accept': _JSON_CONTENT_TYPE})

    def job_attempts(self, job_id):
        return self._root.get('mapreduce/jobs/%(job_id)s/jobattempts' %
                              {'job_id': job_id},
                              params=self._get_params(),
                              headers={'Accept': _JSON_CONTENT_TYPE})

    def tasks(self, job_id):
        return self._root.get('mapreduce/jobs/%(job_id)s/tasks' %
                              {'job_id': job_id},
                              params=self._get_params(),
                              headers={'Accept': _JSON_CONTENT_TYPE})

    def task(self, job_id, task_id):
        return self._root.get('mapreduce/jobs/%(job_id)s/tasks/%(task_id)s' % {
            'job_id': job_id,
            'task_id': task_id
        },
                              params=self._get_params(),
                              headers={'Accept': _JSON_CONTENT_TYPE})

    def task_attempts(self, job_id, task_id):
        return self._root.get(
            'mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts' % {
                'job_id': job_id,
                'task_id': task_id
            },
            params=self._get_params(),
            headers={'Accept': _JSON_CONTENT_TYPE})

    def task_counters(self, job_id, task_id):
        job_id = job_id.replace('application', 'job')
        return self._root.get(
            'mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/counters' % {
                'job_id': job_id,
                'task_id': task_id
            },
            params=self._get_params(),
            headers={'Accept': _JSON_CONTENT_TYPE})

    def task_attempt(self, job_id, task_id, attempt_id):
        return self._root.get(
            'mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts/%(attempt_id)s'
            % {
                'job_id': job_id,
                'task_id': task_id,
                'attempt_id': attempt_id
            },
            params=self._get_params(),
            headers={'Accept': _JSON_CONTENT_TYPE})

    def task_attempt_counters(self, job_id, task_id, attempt_id):
        return self._root.get(
            'mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts/%(attempt_id)s/counters'
            % {
                'job_id': job_id,
                'task_id': task_id,
                'attempt_id': attempt_id
            },
            params=self._get_params(),
            headers={'Accept': _JSON_CONTENT_TYPE})
Ejemplo n.º 27
0
class SparkHistoryServerApi(object):
    def __init__(self,
                 spark_hs_url,
                 security_enabled=False,
                 ssl_cert_ca_verify=False):
        self._ui_url = spark_hs_url
        self._url = posixpath.join(spark_hs_url, 'api/%s/' % _API_VERSION)
        self._client = HttpClient(self._url, logger=LOG)
        self._root = Resource(self._client)
        self._security_enabled = security_enabled

        if self._security_enabled:
            self._client.set_kerberos_auth()

        self._client.set_verify(ssl_cert_ca_verify)

    def __str__(self):
        return "Spark History Server API at %s" % (self._url, )

    @property
    def url(self):
        return self._url

    @property
    def ui_url(self):
        return self._ui_url

    @property
    def headers(self):
        return {'Accept': _JSON_CONTENT_TYPE}

    def applications(self):
        return self._root.get('applications', headers=self.headers)

    def application(self, app_id):
        return self._root.get('applications/%(app_id)s' % {'app_id': app_id},
                              headers=self.headers)

    def jobs(self, app_id):
        return self._root.get('applications/%(app_id)s/jobs' %
                              {'app_id': app_id},
                              headers=self.headers)

    def stages(self, app_id):
        return self._root.get('applications/%(app_id)s/stages' %
                              {'app_id': app_id},
                              headers=self.headers)

    def executors(self, app_id):
        return self._root.get('applications/%(app_id)s/executors' %
                              {'app_id': app_id},
                              headers=self.headers)

    def stage_attempts(self, app_id, stage_id):
        return self._root.get('applications/%(app_id)s/stages/%(stage_id)s' % {
            'app_id': app_id,
            'stage_id': stage_id
        },
                              headers=self.headers)

    def stage_attempt(self, app_id, stage_id, stage_attempt_id):
        return self._root.get(
            'applications/%(app_id)s/stages/%(stage_id)s/%(stage_attempt_id)s'
            % {
                'app_id': app_id,
                'stage_id': stage_id,
                'stage_attempt_id': stage_attempt_id
            },
            headers=self.headers)

    def task_summary(self, app_id, stage_id, stage_attempt_id):
        return self._root.get(
            'applications/%(app_id)s/stages/%(stage_id)s/%(stage_attempt_id)s/taskSummary'
            % {
                'app_id': app_id,
                'stage_id': stage_id,
                'stage_attempt_id': stage_attempt_id
            },
            headers=self.headers)

    def task_list(self, app_id, stage_id, stage_attempt_id):
        return self._root.get(
            'applications/%(app_id)s/stages/%(stage_id)s/%(stage_attempt_id)s/taskList'
            % {
                'app_id': app_id,
                'stage_id': stage_id,
                'stage_attempt_id': stage_attempt_id
            },
            headers=self.headers)

    def storages(self, app_id):
        return self._root.get('applications/%(app_id)s/storage/rdd' %
                              {'app_id': app_id},
                              headers=self.headers)

    def storage(self, app_id, rdd_id):
        return self._root.get(
            'applications/%(app_id)s/storage/rdd/%(rdd_id)s' % {
                'app_id': app_id,
                'rdd_id': rdd_id
            },
            headers=self.headers)

    def download_logs(self, app_id):
        return self._root.get('applications/%(app_id)s/logs' %
                              {'app_id': app_id},
                              headers=self.headers)

    def download_attempt_logs(self, app_id, attempt_id):
        return self._root.get('applications/%(app_id)s/%(attempt_id)s/logs' % {
            'app_id': app_id,
            'attempt_id': attempt_id
        },
                              headers=self.headers)
Ejemplo n.º 28
0
class MapreduceApi(object):
    def __init__(self,
                 mr_url,
                 security_enabled=False,
                 ssl_cert_ca_verify=False):
        self._url = posixpath.join(mr_url, 'proxy')
        self._client = HttpClient(self._url, logger=LOG)
        self._root = Resource(self._client)
        self._security_enabled = security_enabled
        self._thread_local = threading.local()  # To store user info

        if self._security_enabled:
            self._client.set_kerberos_auth()

        self._client.set_verify(ssl_cert_ca_verify)

    def __str__(self):
        return "MapreduceApi at %s" % (self._url, )

    def _get_params(self):
        params = {}

        if self.username != DEFAULT_USER.get():  # We impersonate if needed
            params['doAs'] = self.username
            if not self._security_enabled:
                params['user.name'] = DEFAULT_USER.get()

        return params

    @property
    def url(self):
        return self._url

    @property
    def username(self):
        try:
            return self._thread_local.user
        except AttributeError:
            return DEFAULT_USER.get()

    def setuser(self, user):
        curr = self.username
        self._thread_local.user = user
        return curr

    def job(self, user, job_id):
        app_id = job_id.replace('job', 'application')
        return self._root.get(
            '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s' % {
                'app_id': app_id,
                'job_id': job_id,
                'version': _API_VERSION
            },
            params=self._get_params(),
            headers={'Accept': _JSON_CONTENT_TYPE})

    def counters(self, job_id):
        app_id = job_id.replace('job', 'application')
        response = self._root.get(
            '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/counters' % {
                'app_id': app_id,
                'job_id': job_id,
                'version': _API_VERSION
            },
            params=self._get_params(),
            headers={'Accept': _JSON_CONTENT_TYPE})
        # If it hits the job history server, it will return HTML.
        # Simply return None in this case because there isn't much data there.
        if isinstance(response, basestring):
            return None
        else:
            return response

    def tasks(self, job_id):
        app_id = job_id.replace('job', 'application')
        return self._root.get(
            '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks' % {
                'app_id': app_id,
                'job_id': job_id,
                'version': _API_VERSION
            },
            params=self._get_params(),
            headers={'Accept': _JSON_CONTENT_TYPE})

    def job_attempts(self, job_id):
        app_id = job_id.replace('job', 'application')
        return self._root.get(
            '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/jobattempts' %
            {
                'app_id': app_id,
                'job_id': job_id,
                'version': _API_VERSION
            },
            params=self._get_params(),
            headers={'Accept': _JSON_CONTENT_TYPE})

    def conf(self, job_id):
        app_id = job_id.replace('job', 'application')
        return self._root.get(
            '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/conf' % {
                'app_id': app_id,
                'job_id': job_id,
                'version': _API_VERSION
            },
            params=self._get_params(),
            headers={'Accept': _JSON_CONTENT_TYPE})

    def task(self, job_id, task_id):
        app_id = job_id.replace('job', 'application')
        return self._root.get(
            '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s'
            % {
                'app_id': app_id,
                'job_id': job_id,
                'task_id': task_id,
                'version': _API_VERSION
            },
            params=self._get_params(),
            headers={'Accept': _JSON_CONTENT_TYPE})

    def task_counters(self, job_id, task_id):
        app_id = job_id.replace('job', 'application')
        job_id = job_id.replace('application', 'job')
        return self._root.get(
            '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/counters'
            % {
                'app_id': app_id,
                'job_id': job_id,
                'task_id': task_id,
                'version': _API_VERSION
            },
            params=self._get_params(),
            headers={'Accept': _JSON_CONTENT_TYPE})

    def task_attempts(self, job_id, task_id):
        app_id = job_id.replace('job', 'application')
        return self._root.get(
            '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts'
            % {
                'app_id': app_id,
                'job_id': job_id,
                'task_id': task_id,
                'version': _API_VERSION
            },
            params=self._get_params(),
            headers={'Accept': _JSON_CONTENT_TYPE})

    def task_attempt(self, job_id, task_id, attempt_id):
        app_id = job_id.replace('job', 'application')
        job_id = job_id.replace('application', 'job')
        return self._root.get(
            '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts/%(attempt_id)s'
            % {
                'app_id': app_id,
                'job_id': job_id,
                'task_id': task_id,
                'attempt_id': attempt_id,
                'version': _API_VERSION
            },
            params=self._get_params(),
            headers={'Accept': _JSON_CONTENT_TYPE})

    def task_attempt_counters(self, job_id, task_id, attempt_id):
        app_id = job_id.replace('job', 'application')
        job_id = job_id.replace('application', 'job')
        return self._root.get(
            '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts/%(attempt_id)s/counters'
            % {
                'app_id': app_id,
                'job_id': job_id,
                'task_id': task_id,
                'attempt_id': attempt_id,
                'version': _API_VERSION
            },
            params=self._get_params(),
            headers={'Accept': _JSON_CONTENT_TYPE})

    def kill(self, job_id):
        app_id = job_id.replace('job', 'application')
        get_resource_manager(self.username).kill(
            app_id)  # We need to call the RM
Ejemplo n.º 29
0
Archivo: views.py Proyecto: ronwxy/hue
  try:
    attempt_index = int(attempt_index)
    attempt = job.job_attempts['jobAttempt'][attempt_index]
    log_link = attempt['logsLink']
  except (KeyError, RestException), e:
    raise KeyError(_("Cannot find job attempt '%(id)s'.") % {'id': job.jobId}, e)

  link = '/%s/' % name
  params = {}
  if offset and int(offset) >= 0:
    params['start'] = offset

  root = Resource(get_log_client(log_link), urlparse.urlsplit(log_link)[2], urlencode=False)
  debug_info = ''
  try:
    response = root.get(link, params=params)
    log = html.fromstring(response, parser=html.HTMLParser()).xpath('/html/body/table/tbody/tr/td[2]')[0].text_content()
  except Exception, e:
    log = _('Failed to retrieve log: %s' % e)
    try:
      debug_info = '\nLog Link: %s' % log_link
      debug_info += '\nHTML Response: %s' % response
      LOGGER.error(debug_info)
    except:
      LOGGER.exception('failed to create debug info')

  response = {'log': LinkJobLogs._make_hdfs_links(log), 'debug': debug_info}

  return JsonResponse(response)

Ejemplo n.º 30
0
        raise Exception(
            _("Failed to get application for job %s: %s") % (job.jobId, e))

    if log_link:
        link = '/%s/' % name
        params = {}
        if offset != 0:
            params['start'] = offset

        root = Resource(get_log_client(log_link),
                        urlparse.urlsplit(log_link)[2],
                        urlencode=False)
        api_resp = None

        try:
            api_resp = root.get(link, params=params)
            log = html.fromstring(api_resp, parser=html.HTMLParser()).xpath(
                '/html/body/table/tbody/tr/td[2]')[0].text_content()

            response['status'] = 0
            response['log'] = LinkJobLogs._make_hdfs_links(log)
        except Exception, e:
            response['log'] = _('Failed to retrieve log: %s' % e)
            try:
                debug_info = '\nLog Link: %s' % log_link
                if api_resp:
                    debug_info += '\nHTML Response: %s' % response
                response['debug'] = debug_info
                LOG.error(debug_info)
            except:
                LOG.exception('failed to create debug info')
Ejemplo n.º 31
0
class LivyClient(object):
    def __init__(self, livy_url):
        self._url = posixpath.join(livy_url)
        self._client = HttpClient(self._url, logger=LOG)
        self._root = Resource(self._client)
        self._security_enabled = SECURITY_ENABLED.get()
        self._csrf_enabled = CSRF_ENABLED.get()
        self._thread_local = threading.local()

        if self.security_enabled:
            self._client.set_kerberos_auth()

        if self.csrf_enabled:
            self._client.set_headers({'X-Requested-By': 'hue'})

        self._client.set_verify(SSL_CERT_CA_VERIFY.get())

    def __str__(self):
        return "LivyClient at %s" % (self._url, )

    @property
    def url(self):
        return self._url

    @property
    def security_enabled(self):
        return self._security_enabled

    @property
    def csrf_enabled(self):
        return self._csrf_enabled

    @property
    def user(self):
        return self._thread_local.user

    def setuser(self, user):
        if hasattr(user, 'username'):
            self._thread_local.user = user.username
        else:
            self._thread_local.user = user

    def get_status(self):
        return self._root.get('sessions')

    def get_log(self, uuid, startFrom=None, size=None):
        params = {}

        if startFrom is not None:
            params['from'] = startFrom

        if size is not None:
            params['size'] = size

        response = self._root.get('sessions/%s/log' % uuid, params=params)

        return '\n'.join(response['log'])

    def create_session(self, **properties):
        properties['proxyUser'] = self.user
        return self._root.post('sessions',
                               data=json.dumps(properties),
                               contenttype=_JSON_CONTENT_TYPE)

    def get_sessions(self):
        return self._root.get('sessions')

    def get_session(self, uuid):
        return self._root.get('sessions/%s' % uuid)

    def get_statements(self, uuid):
        return self._root.get('sessions/%s/statements' % uuid)

    def submit_statement(self, uuid, statement):
        data = {'code': statement}
        return self._root.post('sessions/%s/statements' % uuid,
                               data=json.dumps(data),
                               contenttype=_JSON_CONTENT_TYPE)

    def inspect(self, uuid, statement):
        data = {'code': statement}
        return self._root.post('sessions/%s/inspect' % uuid,
                               data=json.dumps(data),
                               contenttype=_JSON_CONTENT_TYPE)

    def fetch_data(self, session, statement):
        return self._root.get('sessions/%s/statements/%s' %
                              (session, statement))

    def cancel(self, session):
        return self._root.post('sessions/%s/interrupt' % session)

    def close(self, uuid):
        return self._root.delete('sessions/%s' % uuid)

    def get_batches(self):
        return self._root.get('batches')

    def submit_batch(self, properties):
        properties['proxyUser'] = self.user
        return self._root.post('batches',
                               data=json.dumps(properties),
                               contenttype=_JSON_CONTENT_TYPE)

    def get_batch(self, uuid):
        return self._root.get('batches/%s' % uuid)

    def get_batch_status(self, uuid):
        response = self._root.get('batches/%s/state' % uuid)
        return response['state']

    def get_batch_log(self, uuid, startFrom=None, size=None):
        params = {}

        if startFrom is not None:
            params['from'] = startFrom

        if size is not None:
            params['size'] = size

        response = self._root.get('batches/%s/log' % uuid, params=params)

        return '\n'.join(response['log'])

    def close_batch(self, uuid):
        return self._root.delete('batches/%s' % uuid)
Ejemplo n.º 32
0
class MapreduceApi(object):

  def __init__(self, mr_url, security_enabled=False, ssl_cert_ca_verify=False):
    self._url = posixpath.join(mr_url, 'proxy')
    self._client = HttpClient(self._url, logger=LOG)
    self._root = Resource(self._client)
    self._security_enabled = security_enabled
    self._thread_local = threading.local()  # To store user info

    if self._security_enabled:
      self._client.set_kerberos_auth()

    self._client.set_verify(ssl_cert_ca_verify)

  def __str__(self):
    return "MapreduceApi at %s" % (self._url,)

  def _get_params(self):
    params = {}

    if self.username != DEFAULT_USER.get():  # We impersonate if needed
      params['doAs'] = self.username
      if not self._security_enabled:
        params['user.name'] = DEFAULT_USER.get()

    return params

  @property
  def url(self):
    return self._url

  @property
  def username(self):
    try:
      return self._thread_local.user
    except AttributeError:
      return DEFAULT_USER.get()

  def setuser(self, user):
    curr = self.username
    self._thread_local.user = user
    return curr

  def job(self, user, job_id):
    app_id = job_id.replace('job', 'application')
    return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s' % {'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE})

  def counters(self, job_id):
    app_id = job_id.replace('job', 'application')
    response = self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/counters' % {'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE})
    # If it hits the job history server, it will return HTML.
    # Simply return None in this case because there isn't much data there.
    if isinstance(response, basestring):
      return None
    else:
      return response

  def tasks(self, job_id):
    app_id = job_id.replace('job', 'application')
    return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks' % {'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE})

  def job_attempts(self, job_id):
    app_id = job_id.replace('job', 'application')
    return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/jobattempts' % {'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE})

  def conf(self, job_id):
    app_id = job_id.replace('job', 'application')
    return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/conf' % {'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE})

  def task(self, job_id, task_id):
    app_id = job_id.replace('job', 'application')
    return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s' % {'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'version': _API_VERSION}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE})

  def task_counters(self, job_id, task_id):
    app_id = job_id.replace('job', 'application')
    job_id = job_id.replace('application', 'job')
    return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/counters' % {'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'version': _API_VERSION}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE})

  def task_attempts(self, job_id, task_id):
    app_id = job_id.replace('job', 'application')
    return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts' % {'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'version': _API_VERSION}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE})

  def task_attempt(self, job_id, task_id, attempt_id):
    app_id = job_id.replace('job', 'application')
    job_id = job_id.replace('application', 'job')
    return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts/%(attempt_id)s' % {'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'attempt_id': attempt_id, 'version': _API_VERSION}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE})

  def kill(self, job_id):
    app_id = job_id.replace('job', 'application')
    get_resource_manager(self.username).kill(app_id) # We need to call the RM
Ejemplo n.º 33
0
class MapreduceApi(object):
    def __init__(self, oozie_url, security_enabled=False):
        self._url = posixpath.join(oozie_url, 'proxy')
        self._client = HttpClient(self._url, logger=LOG)
        self._root = Resource(self._client)
        self._security_enabled = security_enabled

        if self._security_enabled:
            self._client.set_kerberos_auth()

    def __str__(self):
        return "MapreduceApi at %s" % (self._url, )

    @property
    def url(self):
        return self._url

    def job(self, user, job_id):
        app_id = job_id.replace('job', 'application')
        return self._root.get(
            '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s' % {
                'app_id': app_id,
                'job_id': job_id,
                'version': _API_VERSION
            },
            headers={'Accept': _JSON_CONTENT_TYPE})

    def counters(self, job_id):
        app_id = job_id.replace('job', 'application')
        response = self._root.get(
            '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/counters' % {
                'app_id': app_id,
                'job_id': job_id,
                'version': _API_VERSION
            },
            headers={'Accept': _JSON_CONTENT_TYPE})
        # If it hits the job history server, it will return HTML.
        # Simply return None in this case because there isn't much data there.
        if isinstance(response, basestring):
            return None
        else:
            return response

    def tasks(self, job_id):
        app_id = job_id.replace('job', 'application')
        return self._root.get(
            '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks' % {
                'app_id': app_id,
                'job_id': job_id,
                'version': _API_VERSION
            },
            headers={'Accept': _JSON_CONTENT_TYPE})

    def job_attempts(self, job_id):
        app_id = job_id.replace('job', 'application')
        return self._root.get(
            '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/jobattempts' %
            {
                'app_id': app_id,
                'job_id': job_id,
                'version': _API_VERSION
            },
            headers={'Accept': _JSON_CONTENT_TYPE})

    def conf(self, job_id):
        app_id = job_id.replace('job', 'application')
        return self._root.get(
            '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/conf' % {
                'app_id': app_id,
                'job_id': job_id,
                'version': _API_VERSION
            },
            headers={'Accept': _JSON_CONTENT_TYPE})

    def task(self, job_id, task_id):
        app_id = job_id.replace('job', 'application')
        return self._root.get(
            '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s'
            % {
                'app_id': app_id,
                'job_id': job_id,
                'task_id': task_id,
                'version': _API_VERSION
            },
            headers={'Accept': _JSON_CONTENT_TYPE})

    def task_counters(self, job_id, task_id):
        app_id = job_id.replace('job', 'application')
        job_id = job_id.replace('application', 'job')
        return self._root.get(
            '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/counters'
            % {
                'app_id': app_id,
                'job_id': job_id,
                'task_id': task_id,
                'version': _API_VERSION
            },
            headers={'Accept': _JSON_CONTENT_TYPE})

    def task_attempts(self, job_id, task_id):
        app_id = job_id.replace('job', 'application')
        return self._root.get(
            '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts'
            % {
                'app_id': app_id,
                'job_id': job_id,
                'task_id': task_id,
                'version': _API_VERSION
            },
            headers={'Accept': _JSON_CONTENT_TYPE})

    def task_attempt(self, job_id, task_id, attempt_id):
        app_id = job_id.replace('job', 'application')
        job_id = job_id.replace('application', 'job')
        return self._root.get(
            '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts/%(attempt_id)s'
            % {
                'app_id': app_id,
                'job_id': job_id,
                'task_id': task_id,
                'attempt_id': attempt_id,
                'version': _API_VERSION
            },
            headers={'Accept': _JSON_CONTENT_TYPE})
Ejemplo n.º 34
0
class OozieApi(object):
  def __init__(self, oozie_url, security_enabled=False):
    self._url = posixpath.join(oozie_url, API_VERSION)
    self._client = HttpClient(self._url, logger=LOG)
    if security_enabled:
      self._client.set_kerberos_auth()
    self._root = Resource(self._client)
    self._security_enabled = security_enabled

    # To store user info
    self._thread_local = threading.local()

  def __str__(self):
    return "OozieApi at %s" % (self._url,)

  @property
  def url(self):
    return self._url

  @property
  def security_enabled(self):
    return self._security_enabled

  @property
  def user(self):
    try:
      return self._thread_local.user
    except AttributeError:
      return DEFAULT_USER

  def setuser(self, user):
    """Return the previous user"""
    prev = self.user
    self._thread_local.user = user
    return prev

  def _get_params(self):
    if self.security_enabled:
      return { 'doAs': self.user, 'timezone': TIME_ZONE.get() }
    return { 'user.name': DEFAULT_USER, 'doAs': self.user, 'timezone': TIME_ZONE.get() }

  def _get_oozie_properties(self, properties=None):
    defaults = {
      'user.name': self.user,
    }

    if properties is not None:
      defaults.update(properties)

    return defaults

  VALID_JOB_FILTERS = ('name', 'user', 'group', 'status')

  def get_jobs(self, jobtype, offset=None, cnt=None, **kwargs):
    """
    Get a list of Oozie jobs.

    jobtype is 'wf', 'coord'
    Note that offset is 1-based.
    kwargs is used for filtering and may be one of VALID_FILTERS: name, user, group, status
    """
    params = self._get_params()
    if offset is not None:
      params['offset'] = str(offset)
    if cnt is not None:
      params['len'] = str(cnt)
    params['jobtype'] = jobtype

    filter_list = [ ]
    for key, val in kwargs.iteritems():
      if key not in OozieApi.VALID_JOB_FILTERS:
        raise ValueError('"%s" is not a valid filter for selecting jobs' % (key,))
      filter_list.append('%s=%s' % (key, val))
    params['filter'] = ';'.join(filter_list)

    # Send the request
    resp = self._root.get('jobs', params)
    if jobtype == 'wf':
      wf_list = WorkflowList(self, resp, filters=kwargs)
    elif jobtype == 'coord':
      wf_list = CoordinatorList(self, resp, filters=kwargs)
    else:
      wf_list = BundleList(self, resp, filters=kwargs)
    return wf_list

  def get_workflows(self, offset=None, cnt=None, **kwargs):
    return self.get_jobs('wf', offset, cnt, **kwargs)

  def get_coordinators(self, offset=None, cnt=None, **kwargs):
    return self.get_jobs('coord', offset, cnt, **kwargs)

  def get_bundles(self, offset=None, cnt=None, **kwargs):
    return self.get_jobs('bundle', offset, cnt, **kwargs)

  # TODO: make get_job accept any jobid
  def get_job(self, jobid):
    """
    get_job(jobid) -> Workflow
    """
    params = self._get_params()
    resp = self._root.get('job/%s' % (jobid,), params)
    wf = Workflow(self, resp)
    return wf

  def get_coordinator(self, jobid):
    params = self._get_params()
    params.update({'len': -1})
    resp = self._root.get('job/%s' % (jobid,), params)
    return Coordinator(self, resp)

  def get_bundle(self, jobid):
    params = self._get_params()
    resp = self._root.get('job/%s' % (jobid,), params)
    return Bundle(self, resp)

  def get_job_definition(self, jobid):
    """
    get_job_definition(jobid) -> Definition (xml string)
    """
    params = self._get_params()
    params['show'] = 'definition'
    xml = self._root.get('job/%s' % (jobid,), params)
    return xml

  def get_job_log(self, jobid):
    """
    get_job_log(jobid) -> Log (xml string)
    """
    params = self._get_params()
    params['show'] = 'log'
    xml = self._root.get('job/%s' % (jobid,), params)
    return xml

  def get_action(self, action_id):
    if 'C@' in action_id:
      Klass = CoordinatorAction
    elif 'B@' in action_id:
      Klass = BundleAction
    else:
      Klass = WorkflowAction
    params = self._get_params()
    resp = self._root.get('job/%s' % (action_id,), params)
    return Klass(resp)

  def job_control(self, jobid, action, properties=None, parameters=None):
    """
    job_control(jobid, action) -> None
    Raise RestException on error.
    """
    if action not in ('start', 'suspend', 'resume', 'kill', 'rerun', 'coord-rerun', 'bundle-rerun'):
      msg = 'Invalid oozie job action: %s' % (action,)
      LOG.error(msg)
      raise ValueError(msg)
    properties = self._get_oozie_properties(properties)
    params = self._get_params()
    params['action'] = action
    if parameters is not None:
      params.update(parameters)

    return self._root.put('job/%s' % jobid, params,  data=config_gen(properties), contenttype=_XML_CONTENT_TYPE)

  def submit_workflow(self, application_path, properties=None):
    """
    submit_workflow(application_path, properties=None) -> jobid

    Raise RestException on error.
    """
    defaults = {
      'oozie.wf.application.path': application_path,
      'user.name': self.user,
    }

    if properties is not None:
      defaults.update(properties)
    properties = defaults

    return self.submit_job(properties)

  # Is name actually submit_coord?
  def submit_job(self, properties=None):
    """
    submit_job(properties=None, id=None) -> jobid

    Raise RestException on error.
    """
    defaults = {
      'user.name': self.user,
    }

    if properties is not None:
      defaults.update(properties)

    properties = defaults

    params = self._get_params()
    resp = self._root.post('jobs', params, data=config_gen(properties), contenttype=_XML_CONTENT_TYPE)
    return resp['id']

  def rerun(self, jobid, properties=None, params=None):
    properties = self._get_oozie_properties(properties)
    if params is None:
      params = self._get_params()
    else:
      self._get_params().update(params)

    params['action'] = 'rerun'

    return self._root.put('job/%s' % jobid, params, data=config_gen(properties), contenttype=_XML_CONTENT_TYPE)

  def get_build_version(self):
    """
    get_build_version() -> Build version (dictionary)
    """
    params = self._get_params()
    resp = self._root.get('admin/build-version', params)
    return resp

  def get_instrumentation(self):
    params = self._get_params()
    resp = self._root.get('admin/instrumentation', params)
    return resp

  def get_configuration(self):
    """
    get_configuration() -> Oozie config (dictionary)
    """
    params = self._get_params()
    resp = self._root.get('admin/configuration', params)
    return resp

  def get_oozie_status(self):
    """
    get_oozie_status() -> Oozie status (dictionary)
    """
    params = self._get_params()
    resp = self._root.get('admin/status', params)
    return resp
Ejemplo n.º 35
0
class JobServerApi(object):
    def __init__(self, oozie_url):
        self._url = posixpath.join(oozie_url)
        self._client = HttpClient(self._url, logger=LOG)
        self._root = Resource(self._client)
        self._security_enabled = False
        self._thread_local = threading.local()

    def __str__(self):
        return "JobServerApi at %s" % (self._url, )

    @property
    def url(self):
        return self._url

    @property
    def security_enabled(self):
        return self._security_enabled

    @property
    def user(self):
        return self._thread_local.user

    def setuser(self, user):
        if hasattr(user, 'username'):
            self._thread_local.user = user.username
        else:
            self._thread_local.user = user

    def get_status(self):
        return self._root.get('sessions')

    def create_session(self, **kwargs):
        return self._root.post('sessions',
                               data=json.dumps(kwargs),
                               contenttype='application/json')

    def get_session(self, uuid):
        return self._root.get('sessions/%s' % uuid)

    def submit_statement(self, uuid, statement):
        data = {'code': statement}
        return self._root.post('sessions/%s/statements' % uuid,
                               data=json.dumps(data),
                               contenttype=_JSON_CONTENT_TYPE)

    def inspect(self, uuid, statement):
        data = {'code': statement}
        return self._root.post('sessions/%s/inspect' % uuid,
                               data=json.dumps(data),
                               contenttype=_JSON_CONTENT_TYPE)

    def fetch_data(self, session, statement):
        return self._root.get('sessions/%s/statements/%s' %
                              (session, statement))

    def cancel(self, session):
        return self._root.post('sessions/%s/interrupt' % session)

    def get_batches(self):
        return self._root.get('batches')

    def submit_batch(self, properties):
        return self._root.post('batches',
                               data=json.dumps(properties),
                               contenttype=_JSON_CONTENT_TYPE)

    def get_batch(self, uuid):
        return self._root.get('batches/%s' % uuid)

    def delete_batch(self, uuid):
        return self._root.delete('batches/%s' % uuid)
Ejemplo n.º 36
0
class OozieApi(object):
    def __init__(self, oozie_url, security_enabled=False, api_version=API_VERSION):
        self._url = posixpath.join(oozie_url, api_version)
        self._client = HttpClient(self._url, logger=LOG)
        if security_enabled:
            self._client.set_kerberos_auth()
        self._root = Resource(self._client)
        self._security_enabled = security_enabled
        # To store username info
        self._thread_local = threading.local()
        self.api_version = api_version

    def __str__(self):
        return "OozieApi at %s" % (self._url,)

    @property
    def url(self):
        return self._url

    @property
    def security_enabled(self):
        return self._security_enabled

    @property
    def user(self):
        return self._thread_local.user

    def setuser(self, user):
        if hasattr(user, "username"):
            self._thread_local.user = user.username
        else:
            self._thread_local.user = user

    def _get_params(self):
        if self.security_enabled:
            return {"doAs": self.user, "timezone": TIME_ZONE.get()}
        return {"user.name": DEFAULT_USER, "doAs": self.user, "timezone": TIME_ZONE.get()}

    def _get_oozie_properties(self, properties=None):
        defaults = {"user.name": self.user}

        if properties is not None:
            defaults.update(properties)

        return defaults

    VALID_JOB_FILTERS = ("name", "user", "group", "status")

    def get_jobs(self, jobtype, offset=None, cnt=None, **kwargs):
        """
    Get a list of Oozie jobs.

    jobtype is 'wf', 'coord'
    Note that offset is 1-based.
    kwargs is used for filtering and may be one of VALID_FILTERS: name, user, group, status
    """
        params = self._get_params()
        if offset is not None:
            params["offset"] = str(offset)
        if cnt is not None:
            params["len"] = str(cnt)
        params["jobtype"] = jobtype

        filter_list = []
        for key, val in kwargs.iteritems():
            if key not in OozieApi.VALID_JOB_FILTERS:
                raise ValueError('"%s" is not a valid filter for selecting jobs' % (key,))
            filter_list.append("%s=%s" % (key, val))
        params["filter"] = ";".join(filter_list)

        # Send the request
        resp = self._root.get("jobs", params)
        if jobtype == "wf":
            wf_list = WorkflowList(self, resp, filters=kwargs)
        elif jobtype == "coord":
            wf_list = CoordinatorList(self, resp, filters=kwargs)
        else:
            wf_list = BundleList(self, resp, filters=kwargs)
        return wf_list

    def get_workflows(self, offset=None, cnt=None, **kwargs):
        return self.get_jobs("wf", offset, cnt, **kwargs)

    def get_coordinators(self, offset=None, cnt=None, **kwargs):
        return self.get_jobs("coord", offset, cnt, **kwargs)

    def get_bundles(self, offset=None, cnt=None, **kwargs):
        return self.get_jobs("bundle", offset, cnt, **kwargs)

    # TODO: make get_job accept any jobid
    def get_job(self, jobid):
        """
    get_job(jobid) -> Workflow
    """
        params = self._get_params()
        resp = self._root.get("job/%s" % (jobid,), params)
        wf = Workflow(self, resp)
        return wf

    def get_coordinator(self, jobid):
        params = self._get_params()
        params.update({"len": -1})
        resp = self._root.get("job/%s" % (jobid,), params)
        return Coordinator(self, resp)

    def get_bundle(self, jobid):
        params = self._get_params()
        resp = self._root.get("job/%s" % (jobid,), params)
        return Bundle(self, resp)

    def get_job_definition(self, jobid):
        """
    get_job_definition(jobid) -> Definition (xml string)
    """
        params = self._get_params()
        params["show"] = "definition"
        xml = self._root.get("job/%s" % (jobid,), params)
        return xml

    def get_job_log(self, jobid):
        """
    get_job_log(jobid) -> Log (xml string)
    """
        params = self._get_params()
        params["show"] = "log"
        xml = self._root.get("job/%s" % (jobid,), params)
        return xml

    def get_action(self, action_id):
        if "C@" in action_id:
            Klass = CoordinatorAction
        elif "B@" in action_id:
            Klass = BundleAction
        else:
            Klass = WorkflowAction
        params = self._get_params()
        resp = self._root.get("job/%s" % (action_id,), params)
        return Klass(resp)

    def job_control(self, jobid, action, properties=None, parameters=None):
        """
    job_control(jobid, action) -> None
    Raise RestException on error.
    """
        if action not in ("start", "suspend", "resume", "kill", "rerun", "coord-rerun", "bundle-rerun"):
            msg = "Invalid oozie job action: %s" % (action,)
            LOG.error(msg)
            raise ValueError(msg)
        properties = self._get_oozie_properties(properties)
        params = self._get_params()
        params["action"] = action
        if parameters is not None:
            params.update(parameters)

        return self._root.put("job/%s" % jobid, params, data=config_gen(properties), contenttype=_XML_CONTENT_TYPE)

    def submit_workflow(self, application_path, properties=None):
        """
    submit_workflow(application_path, properties=None) -> jobid

    Raise RestException on error.
    """
        defaults = {"oozie.wf.application.path": application_path, "user.name": self.user}

        if properties is not None:
            defaults.update(properties)
        properties = defaults

        return self.submit_job(properties)

    # Is name actually submit_coord?
    def submit_job(self, properties=None):
        """
    submit_job(properties=None, id=None) -> jobid

    Raise RestException on error.
    """
        defaults = {"user.name": self.user}

        if properties is not None:
            defaults.update(properties)

        properties = defaults

        params = self._get_params()
        resp = self._root.post("jobs", params, data=config_gen(properties), contenttype=_XML_CONTENT_TYPE)
        return resp["id"]

    def rerun(self, jobid, properties=None, params=None):
        properties = self._get_oozie_properties(properties)
        if params is None:
            params = self._get_params()
        else:
            self._get_params().update(params)

        params["action"] = "rerun"

        return self._root.put("job/%s" % jobid, params, data=config_gen(properties), contenttype=_XML_CONTENT_TYPE)

    def get_build_version(self):
        """
    get_build_version() -> Build version (dictionary)
    """
        params = self._get_params()
        resp = self._root.get("admin/build-version", params)
        return resp

    def get_instrumentation(self):
        params = self._get_params()
        resp = self._root.get("admin/instrumentation", params)
        return resp

    def get_configuration(self):
        """
    get_configuration() -> Oozie config (dictionary)
    """
        params = self._get_params()
        resp = self._root.get("admin/configuration", params)
        return resp

    def get_oozie_status(self):
        """
    get_oozie_status() -> Oozie status (dictionary)
    """
        params = self._get_params()
        resp = self._root.get("admin/status", params)
        return resp

    def get_oozie_slas(self, **kwargs):
        """
    filter=
      app_name=my-sla-app
      id=0000002-131206135002457-oozie-oozi-W
      nominal_start=2013-06-18T00:01Z
      nominal_end=2013-06-23T00:01Z
    """
        params = self._get_params()
        params["filter"] = ";".join(["%s=%s" % (key, val) for key, val in kwargs.iteritems()])
        resp = self._root.get("sla", params)
        return resp["slaSummaryList"]
Ejemplo n.º 37
0
class JobServerApi(object):

  def __init__(self, livy_url):
    self._url = posixpath.join(livy_url)
    self._client = HttpClient(self._url, logger=LOG)
    self._root = Resource(self._client)
    self._security_enabled = SECURITY_ENABLED.get()
    self._csrf_enabled = CSRF_ENABLED.get()
    self._thread_local = threading.local()

    if self.security_enabled:
      self._client.set_kerberos_auth()

    if self.csrf_enabled:
      self._client.set_headers({'X-Requested-By' : 'hue'})

    self._client.set_verify(SSL_CERT_CA_VERIFY.get())

  def __str__(self):
    return "JobServerApi at %s" % (self._url,)

  @property
  def url(self):
    return self._url

  @property
  def security_enabled(self):
    return self._security_enabled

  @property
  def csrf_enabled(self):
    return self._csrf_enabled

  @property
  def user(self):
    return self._thread_local.user

  def setuser(self, user):
    if hasattr(user, 'username'):
      self._thread_local.user = user.username
    else:
      self._thread_local.user = user

  def get_status(self):
    return self._root.get('sessions')

  def get_log(self, uuid, startFrom=None, size=None):
    params = {}

    if startFrom is not None:
      params['from'] = startFrom

    if size is not None:
      params['size'] = size

    response = self._root.get('sessions/%s/log' % uuid, params=params)

    return '\n'.join(response['log'])

  def create_session(self, **properties):
    properties['proxyUser'] = self.user
    return self._root.post('sessions', data=json.dumps(properties), contenttype=_JSON_CONTENT_TYPE)

  def get_sessions(self):
    return self._root.get('sessions')

  def get_session(self, uuid):
    return self._root.get('sessions/%s' % uuid)

  def get_statements(self, uuid):
    return self._root.get('sessions/%s/statements' % uuid)

  def submit_statement(self, uuid, statement):
    data = {'code': statement}
    return self._root.post('sessions/%s/statements' % uuid, data=json.dumps(data), contenttype=_JSON_CONTENT_TYPE)

  def inspect(self, uuid, statement):
    data = {'code': statement}
    return self._root.post('sessions/%s/inspect' % uuid, data=json.dumps(data), contenttype=_JSON_CONTENT_TYPE)

  def fetch_data(self, session, statement):
    return self._root.get('sessions/%s/statements/%s' % (session, statement))

  def cancel(self, session):
    return self._root.post('sessions/%s/interrupt' % session)

  def close(self, uuid):
    return self._root.delete('sessions/%s' % uuid)

  def get_batches(self):
    return self._root.get('batches')

  def submit_batch(self, properties):
    properties['proxyUser'] = self.user
    return self._root.post('batches', data=json.dumps(properties), contenttype=_JSON_CONTENT_TYPE)

  def get_batch(self, uuid):
    return self._root.get('batches/%s' % uuid)

  def get_batch_status(self, uuid):
    response = self._root.get('batches/%s/state' % uuid)
    return response['state']

  def get_batch_log(self, uuid, startFrom=None, size=None):
    params = {}

    if startFrom is not None:
      params['from'] = startFrom

    if size is not None:
      params['size'] = size

    response = self._root.get('batches/%s/log' % uuid, params=params)

    return '\n'.join(response['log'])

  def close_batch(self, uuid):
    return self._root.delete('batches/%s' % uuid)
Ejemplo n.º 38
0
class HistoryServerApi(object):
    def __init__(self,
                 oozie_url,
                 security_enabled=False,
                 ssl_cert_ca_verify=False):
        self._url = posixpath.join(oozie_url, 'ws/%s/history' % _API_VERSION)
        self._client = HttpClient(self._url, logger=LOG)
        self._root = Resource(self._client)
        self._security_enabled = security_enabled

        if self._security_enabled:
            self._client.set_kerberos_auth()

        self._client.set_verify(ssl_cert_ca_verify)

    def __str__(self):
        return "HistoryServerApi at %s" % (self._url, )

    @property
    def url(self):
        return self._url

    def job(self, user, job_id):
        return self._root.get('mapreduce/jobs/%(job_id)s' % {'job_id': job_id},
                              headers={'Accept': _JSON_CONTENT_TYPE})

    def counters(self, job_id):
        return self._root.get('mapreduce/jobs/%(job_id)s/counters' %
                              {'job_id': job_id},
                              headers={'Accept': _JSON_CONTENT_TYPE})

    def conf(self, job_id):
        return self._root.get('mapreduce/jobs/%(job_id)s/conf' %
                              {'job_id': job_id},
                              headers={'Accept': _JSON_CONTENT_TYPE})

    def job_attempts(self, job_id):
        return self._root.get('mapreduce/jobs/%(job_id)s/jobattempts' %
                              {'job_id': job_id},
                              headers={'Accept': _JSON_CONTENT_TYPE})

    def tasks(self, job_id):
        return self._root.get('mapreduce/jobs/%(job_id)s/tasks' %
                              {'job_id': job_id},
                              headers={'Accept': _JSON_CONTENT_TYPE})

    def task(self, job_id, task_id):
        return self._root.get('mapreduce/jobs/%(job_id)s/tasks/%(task_id)s' % {
            'job_id': job_id,
            'task_id': task_id
        },
                              headers={'Accept': _JSON_CONTENT_TYPE})

    def task_attempts(self, job_id, task_id):
        return self._root.get(
            'mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts' % {
                'job_id': job_id,
                'task_id': task_id
            },
            headers={'Accept': _JSON_CONTENT_TYPE})

    def task_counters(self, job_id, task_id):
        job_id = job_id.replace('application', 'job')
        return self._root.get(
            'mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/counters' % {
                'job_id': job_id,
                'task_id': task_id
            },
            headers={'Accept': _JSON_CONTENT_TYPE})

    def task_attempt(self, job_id, task_id, attempt_id):
        return self._root.get(
            'mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts/%(attempt_id)s'
            % {
                'job_id': job_id,
                'task_id': task_id,
                'attempt_id': attempt_id
            },
            headers={'Accept': _JSON_CONTENT_TYPE})

    def task_attempt_counters(self, job_id, task_id, attempt_id):
        return self._root.get(
            'mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts/%(attempt_id)s/counters'
            % {
                'job_id': job_id,
                'task_id': task_id,
                'attempt_id': attempt_id
            },
            headers={'Accept': _JSON_CONTENT_TYPE})
Ejemplo n.º 39
0
class SparkHistoryServerApi(object):

  def __init__(self, spark_hs_url, security_enabled=False, ssl_cert_ca_verify=False):
    self._ui_url = spark_hs_url
    self._url = posixpath.join(spark_hs_url, 'api/%s/' % _API_VERSION)
    self._client = HttpClient(self._url, logger=LOG)
    self._root = Resource(self._client)
    self._security_enabled = security_enabled

    if self._security_enabled:
      self._client.set_kerberos_auth()

    self._client.set_verify(ssl_cert_ca_verify)

  def __str__(self):
    return "Spark History Server API at %s" % (self._url,)

  @property
  def url(self):
    return self._url

  @property
  def ui_url(self):
    return self._ui_url

  @property
  def headers(self):
    return {'Accept': _JSON_CONTENT_TYPE}

  def applications(self):
    return self._root.get('applications', headers=self.headers)

  def application(self, app_id):
    return self._root.get('applications/%(app_id)s' % {'app_id': app_id}, headers=self.headers)

  def jobs(self, app_id):
    return self._root.get('applications/%(app_id)s/jobs' % {'app_id': app_id}, headers=self.headers)

  def stages(self, app_id):
    return self._root.get('applications/%(app_id)s/stages' % {'app_id': app_id}, headers=self.headers)

  def executors(self, job):
    LOG.debug("Getting executors for Spark job %s" % job.jobId)
    app_id = self.get_real_app_id(job)
    if not app_id:
      return []

    return self._root.get('applications/%(app_id)s/executors' % {'app_id': app_id}, headers=self.headers)

  def stage_attempts(self, app_id, stage_id):
    return self._root.get('applications/%(app_id)s/stages/%(stage_id)s' % {'app_id': app_id, 'stage_id': stage_id}, headers=self.headers)

  def stage_attempt(self, app_id, stage_id, stage_attempt_id):
    return self._root.get('applications/%(app_id)s/stages/%(stage_id)s/%(stage_attempt_id)s' % {'app_id': app_id, 'stage_id': stage_id, 'stage_attempt_id': stage_attempt_id}, headers=self.headers)

  def task_summary(self, app_id, stage_id, stage_attempt_id):
    return self._root.get('applications/%(app_id)s/stages/%(stage_id)s/%(stage_attempt_id)s/taskSummary' % {'app_id': app_id, 'stage_id': stage_id, 'stage_attempt_id': stage_attempt_id}, headers=self.headers)

  def task_list(self, app_id, stage_id, stage_attempt_id):
    return self._root.get('applications/%(app_id)s/stages/%(stage_id)s/%(stage_attempt_id)s/taskList' % {'app_id': app_id, 'stage_id': stage_id, 'stage_attempt_id': stage_attempt_id}, headers=self.headers)

  def storages(self, app_id):
    return self._root.get('applications/%(app_id)s/storage/rdd' % {'app_id': app_id}, headers=self.headers)

  def storage(self, app_id, rdd_id):
    return self._root.get('applications/%(app_id)s/storage/rdd/%(rdd_id)s' % {'app_id': app_id, 'rdd_id': rdd_id}, headers=self.headers)

  def download_logs(self, app_id):
    return self._root.get('applications/%(app_id)s/logs' % {'app_id': app_id}, headers=self.headers)

  def download_attempt_logs(self, app_id, attempt_id):
    return self._root.get('applications/%(app_id)s/%(attempt_id)s/logs' % {'app_id': app_id, 'attempt_id': attempt_id}, headers=self.headers)

  def download_executors_logs(self, request, job, name, offset):
    log_links = self.get_executors_loglinks(job)

    return self.retrieve_log_content(log_links, name, request.user.username, offset)

  def download_executor_logs(self, user, executor, name, offset):
    return self.retrieve_log_content(executor['logs'], name, user.username, offset)

  def retrieve_log_content(self, log_links, log_name, username, offset):
    params = {
      'doAs': username
    }

    if offset != 0:
      params['start'] = offset

    if not log_name or not log_name == 'stderr':
      log_name = 'stdout'

    log = ''
    if log_links and log_name in log_links:
      log_link = log_links[log_name]

      root = Resource(get_log_client(log_link), lib_urlsplit(log_link)[2], urlencode=False)
      response = root.get('', params=params)
      log = html.fromstring(response, parser=html.HTMLParser()).xpath('/html/body/table/tbody/tr/td[2]')[0].text_content()
    return log

  def get_executors_loglinks(self, job):
    executor = None
    if job.metrics and 'executors' in job.metrics and job.metrics['executors']:
      executors = [executor for executor in job.metrics['executors'] if executor[0] == 'driver']  # look up driver executor
      if not executors:
        executor = job.metrics['executors'][0]
      else:
        executor = executors[0]

    return None if not executor else executor[12]

  def get_real_app_id(self, job):
    # https://spark.apache.org/docs/1.6.0/monitoring.html and https://spark.apache.org/docs/2.0.0/monitoring.html
    # When running on Yarn, each application has multiple attempts, so [app-id] is actually [app-id]/[attempt-id] in all cases.
    # When running job as cluster mode, an attempt number is part of application ID, but proxy URL can't be resolved to match
    # Spark history URL. In the applications list, each job's attampt list shows if attempt ID is used and how many attempts.

    try:
      jobs_json = self.applications()
      job_filtered_json = [x for x in jobs_json if x['id'] == job.jobId]

      if not job_filtered_json:
        return {}

      attempts = job_filtered_json[0]['attempts']

      if len(attempts) == 1:
        app_id = job.jobId if 'attemptId' not in attempts[0] else job.jobId + '/' + attempts[0]['attemptId']
      else:
        app_id = job.jobId + '/%d' % len(attempts)

      LOG.debug("Getting real spark app id %s for Spark job %s" % (app_id, job.jobId))
    except Exception as e:
      LOG.error('Cannot get real app id %s: %s' % (job.jobId, e))
      app_id = None

    return app_id
Ejemplo n.º 40
0
class JobServerApi(object):
    def __init__(self, oozie_url):
        self._url = posixpath.join(oozie_url)
        self._client = HttpClient(self._url, logger=LOG)
        self._root = Resource(self._client)
        self._security_enabled = False
        self._thread_local = threading.local()

    def __str__(self):
        return "JobServerApi at %s" % (self._url,)

    @property
    def url(self):
        return self._url

    @property
    def security_enabled(self):
        return self._security_enabled

    @property
    def user(self):
        return self._thread_local.user

    def setuser(self, user):
        if hasattr(user, "username"):
            self._thread_local.user = user.username
        else:
            self._thread_local.user = user

    def get_status(self):
        return self._root.get("sessions")

    def get_log(self, uuid, startFrom=None, size=None):
        params = {}

        if startFrom is not None:
            params["from"] = startFrom

        if size is not None:
            params["size"] = size

        response = self._root.get("sessions/%s/log" % uuid, params=params)

        return "\n".join(response["log"])

    def create_session(self, **properties):
        properties["proxyUser"] = self.user
        return self._root.post("sessions", data=json.dumps(properties), contenttype="application/json")

    def get_session(self, uuid):
        return self._root.get("sessions/%s" % uuid)

    def submit_statement(self, uuid, statement):
        data = {"code": statement}
        return self._root.post("sessions/%s/statements" % uuid, data=json.dumps(data), contenttype=_JSON_CONTENT_TYPE)

    def inspect(self, uuid, statement):
        data = {"code": statement}
        return self._root.post("sessions/%s/inspect" % uuid, data=json.dumps(data), contenttype=_JSON_CONTENT_TYPE)

    def fetch_data(self, session, statement):
        return self._root.get("sessions/%s/statements/%s" % (session, statement))

    def cancel(self, session):
        return self._root.post("sessions/%s/interrupt" % session)

    def close(self, uuid):
        return self._root.delete("sessions/%s" % uuid)

    def get_batches(self):
        return self._root.get("batches")

    def submit_batch(self, properties):
        properties["proxyUser"] = self.user
        return self._root.post("batches", data=json.dumps(properties), contenttype=_JSON_CONTENT_TYPE)

    def get_batch(self, uuid):
        return self._root.get("batches/%s" % uuid)

    def get_batch_status(self, uuid):
        response = self._root.get("batches/%s/state" % uuid)
        return response["state"]

    def get_batch_log(self, uuid, startFrom=None, size=None):
        params = {}

        if startFrom is not None:
            params["from"] = startFrom

        if size is not None:
            params["size"] = size

        response = self._root.get("batches/%s/log" % uuid, params=params)

        return "\n".join(response["log"])

    def close_batch(self, uuid):
        return self._root.delete("batches/%s" % uuid)
Ejemplo n.º 41
0
class ImpalaDaemonApi(object):
    def __init__(self, server_url):
        self._url = server_url
        self._client = HttpClient(self._url, logger=LOG)
        self._root = Resource(self._client)
        self._security_enabled = is_kerberos_enabled()
        self._webserver_spnego_enabled = is_webserver_spnego_enabled()
        self._thread_local = threading.local()

        # You can set username/password for Impala Web UI which overrides kerberos
        if DAEMON_API_USERNAME.get() is not None and DAEMON_API_PASSWORD.get(
        ) is not None:
            if DAEMON_API_AUTH_SCHEME.get().lower() == 'basic':
                self._client.set_basic_auth(DAEMON_API_USERNAME.get(),
                                            DAEMON_API_PASSWORD.get())
                LOG.info(
                    "Using username and password for basic authentication")
            else:
                self._client.set_digest_auth(DAEMON_API_USERNAME.get(),
                                             DAEMON_API_PASSWORD.get())
                LOG.info(
                    'Using username and password for digest authentication')
        elif self._webserver_spnego_enabled or self._security_enabled:
            self._client.set_kerberos_auth()
            LOG.info('Using kerberos principal for authentication')

    def __str__(self):
        return "ImpalaDaemonApi at %s" % self._url

    @property
    def url(self):
        return self._url

    @property
    def security_enabled(self):
        return self._security_enabled

    @property
    def user(self):
        return self._thread_local.user

    def set_user(self, user):
        if hasattr(user, 'username'):
            self._thread_local.user = user.username
        else:
            self._thread_local.user = user

    def get_queries(self):
        params = {'json': 'true'}

        resp = self._root.get('queries', params=params)
        try:
            if isinstance(resp, basestring):
                return json.loads(resp)
            else:
                return resp
        except ValueError as e:
            raise ImpalaDaemonApiException(
                'ImpalaDaemonApi did not return valid JSON: %s' % e)

    def get_query(self, query_id):
        params = {'query_id': query_id, 'json': 'true'}

        resp = self._root.get('query_plan', params=params)
        try:
            if isinstance(resp, basestring):
                return json.loads(resp)
            else:
                return resp
        except ValueError as e:
            raise ImpalaDaemonApiException(
                'ImpalaDaemonApi did not return valid JSON: %s' % e)

    def get_query_profile(self, query_id):
        params = {'query_id': query_id, 'json': 'true'}

        resp = self._root.get('query_profile', params=params)
        try:
            if isinstance(resp, basestring):
                return json.loads(resp)
            else:
                return resp
        except ValueError as e:
            raise ImpalaDaemonApiException(
                'ImpalaDaemonApi query_profile did not return valid JSON: %s' %
                e)

    def get_query_memory(self, query_id):
        params = {'query_id': query_id, 'json': 'true'}

        resp = self._root.get('query_memory', params=params)
        try:
            if isinstance(resp, basestring):
                return json.loads(resp)
            else:
                return resp
        except ValueError as e:
            raise ImpalaDaemonApiException(
                'ImpalaDaemonApi query_memory did not return valid JSON: %s' %
                e)

    def kill(self, query_id):
        params = {'query_id': query_id, 'json': 'true'}
        resp = self._root.get('cancel_query', params=params)
        try:
            if isinstance(resp, basestring):
                return json.loads(resp)
            else:
                return resp
        except ValueError as e:
            raise ImpalaDaemonApiException(
                'ImpalaDaemonApi kill did not return valid JSON: %s' % e)

    def get_query_backends(self, query_id):
        params = {'query_id': query_id, 'json': 'true'}

        resp = self._root.get('query_backends', params=params)
        try:
            if isinstance(resp, basestring):
                return json.loads(resp)
            else:
                return resp
        except ValueError as e:
            raise ImpalaDaemonApiException(
                'ImpalaDaemonApi query_backends did not return valid JSON: %s'
                % e)

    def get_query_finstances(self, query_id):
        params = {'query_id': query_id, 'json': 'true'}

        resp = self._root.get('query_finstances', params=params)
        try:
            if isinstance(resp, basestring):
                return json.loads(resp)
            else:
                return resp
        except ValueError as e:
            raise ImpalaDaemonApiException(
                'ImpalaDaemonApi query_finstances did not return valid JSON: %s'
                % e)

    def get_query_summary(self, query_id):
        params = {'query_id': query_id, 'json': 'true'}

        resp = self._root.get('query_summary', params=params)
        try:
            if isinstance(resp, basestring):
                return json.loads(resp)
            else:
                return resp
        except ValueError as e:
            raise ImpalaDaemonApiException(
                'ImpalaDaemonApi query_summary did not return valid JSON: %s' %
                e)

    def get_query_profile_encoded(self, query_id):
        params = {'query_id': query_id}

        return self._root.get('query_profile_encoded', params=params)
Ejemplo n.º 42
0
class JobServerApi(object):
    def __init__(self, oozie_url):
        self._url = posixpath.join(oozie_url)
        self._client = HttpClient(self._url, logger=LOG)
        self._root = Resource(self._client)
        self._security_enabled = False
        # To store user info
        self._thread_local = threading.local()

    def __str__(self):
        return "JobServerApi at %s" % (self._url, )

    @property
    def url(self):
        return self._url

    @property
    def security_enabled(self):
        return self._security_enabled

    @property
    def user(self):
        return self._thread_local.user

    def setuser(self, user):
        if hasattr(user, 'username'):
            self._thread_local.user = user.username
        else:
            self._thread_local.user = user

    def get_status(self, **kwargs):
        return self._root.get('healthz',
                              params=kwargs,
                              headers={'Accept': _TEXT_CONTENT_TYPE})

    def submit_job(self, appName, classPath, data, context=None, sync=False):
        params = {'appName': appName, 'classPath': classPath, 'sync': sync}
        if context:
            params['context'] = context
        return self._root.post('jobs' % params,
                               params=params,
                               data=data,
                               contenttype=_BINARY_CONTENT_TYPE)

    def job(self, job_id):
        return self._root.get('jobs/%s' % job_id,
                              headers={'Accept': _JSON_CONTENT_TYPE})

    def jobs(self, **kwargs):
        return self._root.get('jobs',
                              params=kwargs,
                              headers={'Accept': _JSON_CONTENT_TYPE})

    def create_context(self, name, **kwargs):
        return self._root.post('contexts/%s' % name,
                               params=kwargs,
                               contenttype=_BINARY_CONTENT_TYPE)

    def contexts(self, **kwargs):
        return self._root.get('contexts',
                              params=kwargs,
                              headers={'Accept': _JSON_CONTENT_TYPE})

    def delete_context(self, name, **kwargs):
        return self._root.delete('contexts/%s' % name)

    def upload_jar(self, app_name, data):
        return self._root.post('jars/%s' % app_name,
                               data=data,
                               contenttype=_BINARY_CONTENT_TYPE)

    def jars(self, **kwargs):
        return self._root.get('jars',
                              params=kwargs,
                              headers={'Accept': _JSON_CONTENT_TYPE})
Ejemplo n.º 43
0
        log_link = attempt['logsLink']
    except (KeyError, RestException), e:
        raise KeyError(
            _("Cannot find job attempt '%(id)s'.") % {'id': job.jobId}, e)

    link = '/%s/' % name
    params = {}
    if offset and int(offset) >= 0:
        params['start'] = offset

    root = Resource(get_log_client(log_link),
                    urlparse.urlsplit(log_link)[2],
                    urlencode=False)

    try:
        response = root.get(link, params=params)
        log = html.fromstring(response).xpath(
            '/html/body/table/tbody/tr/td[2]')[0].text_content()
    except Exception, e:
        log = _('Failed to retrieve log: %s') % e

    response = {'log': log}

    return HttpResponse(json.dumps(response), mimetype="application/json")


@check_job_permission
def job_single_logs(request, job):
    """
  Try to smartly detect the most useful task attempt (e.g. Oozie launcher, failed task) and get its MR logs.
  """
Ejemplo n.º 44
0
class OozieApi(object):
  def __init__(self, oozie_url, user, security_enabled=False, api_version=API_VERSION, ssl_cert_ca_verify=True):
    self._url = posixpath.join(oozie_url, api_version)
    self._client = HttpClient(self._url, logger=LOG)

    if security_enabled:
      self._client.set_kerberos_auth()

    self._client.set_verify(ssl_cert_ca_verify)

    self._root = Resource(self._client)
    self._security_enabled = security_enabled
    # To store username info
    if hasattr(user, 'username'):
      self.user = user.username
    else:
      self.user = user
    self.api_version = api_version

  def __str__(self):
    return "OozieApi at %s" % (self._url,)

  @property
  def url(self):
    return self._url

  @property
  def security_enabled(self):
    return self._security_enabled

  def _get_params(self):
    if self.security_enabled:
      return { 'doAs': self.user, 'timezone': TIME_ZONE.get() }
    return { 'user.name': DEFAULT_USER, 'doAs': self.user, 'timezone': TIME_ZONE.get() }

  def _get_oozie_properties(self, properties=None):
    defaults = {
      'user.name': self.user,
    }

    if properties is not None:
      defaults.update(properties)

    return defaults

  VALID_JOB_FILTERS = ('name', 'user', 'group', 'status', 'startcreatedtime')
  VALID_LOG_FILTERS = {'recent', 'limit', 'loglevel', 'text'}

  def get_jobs(self, jobtype, offset=None, cnt=None, filters=None):
    """
    Get a list of Oozie jobs.

    Note that offset is 1-based.
    kwargs is used for filtering and may be one of VALID_FILTERS: name, user, group, status
    """
    params = self._get_params()
    if offset is not None:
      params['offset'] = str(offset)
    if cnt is not None:
      params['len'] = str(cnt)
    if filters is None:
      filters = []
    params['jobtype'] = jobtype

    filter_list = []
    for key, val in filters:
      if key not in OozieApi.VALID_JOB_FILTERS:
        raise ValueError('"%s" is not a valid filter for selecting jobs' % (key,))
      filter_list.append('%s=%s' % (key, val))
    params['filter'] = ';'.join(filter_list)

    # Send the request
    resp = self._root.get('jobs', params)
    if jobtype == 'wf':
      wf_list = WorkflowList(self, resp, filters=filters)
    elif jobtype == 'coord':
      wf_list = CoordinatorList(self, resp, filters=filters)
    else:
      wf_list = BundleList(self, resp, filters=filters)
    return wf_list

  def get_workflows(self, offset=None, cnt=None, filters=None):
    return self.get_jobs('wf', offset, cnt, filters)

  def get_coordinators(self, offset=None, cnt=None, filters=None):
    return self.get_jobs('coord', offset, cnt, filters)

  def get_bundles(self, offset=None, cnt=None, filters=None):
    return self.get_jobs('bundle', offset, cnt, filters)

  # TODO: make get_job accept any jobid
  def get_job(self, jobid):
    """
    get_job(jobid) -> Workflow
    """
    params = self._get_params()
    resp = self._root.get('job/%s' % (jobid,), params)
    wf = Workflow(self, resp)
    return wf

  def get_coordinator(self, jobid, offset=None, cnt=None, filters=None):
    params = self._get_params()
    if offset is not None:
      params['offset'] = str(offset)
    if cnt is not None:
      params['len'] = str(cnt)
    if filters is None:
      filters = {}
    params.update({'order': 'desc'})

    filter_list = []
    for key, val in filters:
      if key not in OozieApi.VALID_JOB_FILTERS:
        raise ValueError('"%s" is not a valid filter for selecting jobs' % (key,))
      filter_list.append('%s=%s' % (key, val))
    params['filter'] = ';'.join(filter_list)

    resp = self._root.get('job/%s' % (jobid,), params)
    return Coordinator(self, resp)

  def get_bundle(self, jobid):
    params = self._get_params()
    resp = self._root.get('job/%s' % (jobid,), params)
    return Bundle(self, resp)

  def get_job_definition(self, jobid):
    """
    get_job_definition(jobid) -> Definition (xml string)
    """
    params = self._get_params()
    params['show'] = 'definition'
    return self._root.get('job/%s' % (jobid,), params)


  def get_job_log(self, jobid, logfilter=None):
    """
    get_job_log(jobid) -> Log (xml string)
    """
    params = self._get_params()
    params['show'] = 'log'

    filter_list = []
    if logfilter is None:
      logfilter = []
    for key, val in logfilter:
      if key not in OozieApi.VALID_LOG_FILTERS:
        raise ValueError('"%s" is not a valid filter for job logs' % (key,))
      filter_list.append('%s=%s' % (key, val))
    params['logfilter'] = ';'.join(filter_list)
    return self._root.get('job/%s' % (jobid,), params)


  def get_job_status(self, jobid):
    params = self._get_params()
    params['show'] = 'status'

    xml = self._root.get('job/%s' % (jobid,), params)
    return xml

  def get_action(self, action_id):
    if 'C@' in action_id:
      Klass = CoordinatorAction
    elif 'B@' in action_id:
      Klass = BundleAction
    else:
      Klass = WorkflowAction
    params = self._get_params()
    resp = self._root.get('job/%s' % (action_id,), params)
    return Klass(resp)

  def job_control(self, jobid, action, properties=None, parameters=None):
    """
    job_control(jobid, action) -> None
    Raise RestException on error.
    """
    if action not in ('start', 'suspend', 'resume', 'kill', 'rerun', 'coord-rerun', 'bundle-rerun', 'change', 'ignore'):
      msg = 'Invalid oozie job action: %s' % (action,)
      LOG.error(msg)
      raise ValueError(msg)
    properties = self._get_oozie_properties(properties)
    params = self._get_params()
    params['action'] = action
    if parameters is not None:
      params.update(parameters)

    return self._root.put('job/%s' % jobid, params,  data=config_gen(properties), contenttype=_XML_CONTENT_TYPE)

  def submit_workflow(self, application_path, properties=None):
    """
    submit_workflow(application_path, properties=None) -> jobid

    Raise RestException on error.
    """
    defaults = {
      'oozie.wf.application.path': application_path,
      'user.name': self.user,
    }

    if properties is not None:
      defaults.update(properties)
    properties = defaults

    return self.submit_job(properties)

  # Is name actually submit_coord?
  def submit_job(self, properties=None):
    """
    submit_job(properties=None, id=None) -> jobid

    Raise RestException on error.
    """
    defaults = {
      'user.name': self.user,
    }

    if properties is not None:
      defaults.update(properties)

    properties = defaults

    params = self._get_params()
    resp = self._root.post('jobs', params, data=config_gen(properties), contenttype=_XML_CONTENT_TYPE)
    return resp['id']

  def dryrun(self, properties=None):
    defaults = {
      'user.name': self.user,
    }

    if properties is not None:
      defaults.update(properties)

    properties = defaults

    params = self._get_params()
    params['action'] = 'dryrun'
    return self._root.post('jobs', params, data=config_gen(properties), contenttype=_XML_CONTENT_TYPE)

  def rerun(self, jobid, properties=None, params=None):
    properties = self._get_oozie_properties(properties)
    if params is None:
      params = self._get_params()
    else:
      self._get_params().update(params)

    params['action'] = 'rerun'

    return self._root.put('job/%s' % jobid, params, data=config_gen(properties), contenttype=_XML_CONTENT_TYPE)

  def get_build_version(self):
    """
    get_build_version() -> Build version (dictionary)
    """
    params = self._get_params()
    resp = self._root.get('admin/build-version', params)
    return resp

  def get_instrumentation(self):
    params = self._get_params()
    resp = self._root.get('admin/instrumentation', params)
    return resp

  def get_metrics(self):
    params = self._get_params()
    resp = self._root.get('admin/metrics', params)
    return resp

  def get_configuration(self):
    """
    get_configuration() -> Oozie config (dictionary)
    """
    params = self._get_params()
    resp = self._root.get('admin/configuration', params)
    return resp

  def get_oozie_status(self):
    """
    get_oozie_status() -> Oozie status (dictionary)
    """
    params = self._get_params()
    resp = self._root.get('admin/status', params)
    return resp

  def get_oozie_slas(self, **kwargs):
    """
    filter=
      app_name=my-sla-app
      id=0000002-131206135002457-oozie-oozi-W
      nominal_start=2013-06-18T00:01Z
      nominal_end=2013-06-23T00:01Z
    """
    params = self._get_params()
    params['filter'] = ';'.join(['%s=%s' % (key, val) for key, val in kwargs.iteritems()])
    resp = self._root.get('sla', params)
    return resp['slaSummaryList']
Ejemplo n.º 45
0
    def get_task_log(self, offset=0):
        logs = []
        attempt = self.task.job.job_attempts['jobAttempt'][-1]
        log_link = attempt['logsLink']

        # Generate actual task log link from logsLink url
        if self.task.job.status in ('NEW', 'SUBMITTED', 'RUNNING'
                                    ) or self.type == 'Oozie Launcher':
            logs_path = '/node/containerlogs/'
            node_url, tracking_path = log_link.split(logs_path)
            container_id, user = tracking_path.strip('/').split('/')

            # Replace log path tokens with actual container properties if available
            if hasattr(self, 'nodeHttpAddress') and 'nodeId' in attempt:
                node_url = '%s://%s' % (node_url.split('://')[0],
                                        self.nodeHttpAddress)
            container_id = self.assignedContainerId if hasattr(
                self, 'assignedContainerId') else container_id

            log_link = '%(node_url)s/%(logs_path)s/%(container)s/%(user)s' % {
                'node_url': node_url,
                'logs_path': logs_path.strip('/'),
                'container': container_id,
                'user': user
            }
        else:  # Completed jobs
            logs_path = '/jobhistory/logs/'
            root_url, tracking_path = log_link.split(logs_path)
            node_url, container_id, attempt_id, user = tracking_path.strip(
                '/').split('/')

            # Replace log path tokens with actual attempt properties if available
            if hasattr(self, 'nodeHttpAddress') and 'nodeId' in attempt:
                node_url = '%s:%s' % (self.nodeHttpAddress.split(':')[0],
                                      attempt['nodeId'].split(':')[1])
            container_id = self.assignedContainerId if hasattr(
                self, 'assignedContainerId') else container_id
            attempt_id = self.attemptId if hasattr(self,
                                                   'attemptId') else attempt_id

            log_link = '%(root_url)s/%(logs_path)s/%(node)s/%(container)s/%(attempt)s/%(user)s' % {
                'root_url': root_url,
                'logs_path': logs_path.strip('/'),
                'node': node_url,
                'container': container_id,
                'attempt': attempt_id,
                'user': user
            }

        for name in ('stdout', 'stderr', 'syslog'):
            link = '/%s/' % name
            if self.type == 'Oozie Launcher' and not self.task.job.status == 'FINISHED':  # Yarn currently dumps with 500 error with doas in running state
                params = {}
            else:
                params = {'doAs': user}

            if int(offset) != 0:
                params['start'] = offset
            else:
                params['start'] = 0

            response = None
            try:
                log_link = re.sub('job_[^/]+', self.id, log_link)
                root = Resource(get_log_client(log_link),
                                urlparse.urlsplit(log_link)[2],
                                urlencode=False)
                response = root.get(link, params=params)
                log = html.fromstring(
                    response, parser=html.HTMLParser()).xpath(
                        '/html/body/table/tbody/tr/td[2]')[0].text_content()
            except Exception, e:
                log = _('Failed to retrieve log: %s' % e)
                try:
                    debug_info = '\nLog Link: %s' % log_link
                    if response:
                        debug_info += '\nHTML Response: %s' % response
                    LOG.error(debug_info)
                except:
                    LOG.exception('failed to build debug info')

            logs.append(log)
Ejemplo n.º 46
0
class JobServerApi(object):
    def __init__(self, oozie_url):
        self._url = posixpath.join(oozie_url)
        self._client = HttpClient(self._url, logger=LOG)
        self._root = Resource(self._client)
        self._security_enabled = False
        self._thread_local = threading.local()

    def __str__(self):
        return "JobServerApi at %s" % (self._url, )

    @property
    def url(self):
        return self._url

    @property
    def security_enabled(self):
        return self._security_enabled

    @property
    def user(self):
        return self._thread_local.user

    def setuser(self, user):
        if hasattr(user, 'username'):
            self._thread_local.user = user.username
        else:
            self._thread_local.user = user

    def get_status(self):
        return self._root.get('sessions')

    def get_log(self, uuid, startFrom=None, size=None):
        params = {}

        if startFrom is not None:
            params['from'] = startFrom

        if size is not None:
            params['size'] = size

        response = self._root.get('sessions/%s/log' % uuid, params=params)

        return '\n'.join(response['log'])

    def create_session(self, **properties):
        properties['proxyUser'] = self.user
        return self._root.post('sessions',
                               data=json.dumps(properties),
                               contenttype=_JSON_CONTENT_TYPE)

    def get_session(self, uuid):
        return self._root.get('sessions/%s' % uuid)

    def submit_statement(self, uuid, statement):
        data = {'code': statement}
        return self._root.post('sessions/%s/statements' % uuid,
                               data=json.dumps(data),
                               contenttype=_JSON_CONTENT_TYPE)

    def inspect(self, uuid, statement):
        data = {'code': statement}
        return self._root.post('sessions/%s/inspect' % uuid,
                               data=json.dumps(data),
                               contenttype=_JSON_CONTENT_TYPE)

    def fetch_data(self, session, statement):
        return self._root.get('sessions/%s/statements/%s' %
                              (session, statement))

    def cancel(self, session):
        return self._root.post('sessions/%s/interrupt' % session)

    def close(self, uuid):
        return self._root.delete('sessions/%s' % uuid)

    def get_batches(self):
        return self._root.get('batches')

    def submit_batch(self, properties):
        properties['proxyUser'] = self.user
        return self._root.post('batches',
                               data=json.dumps(properties),
                               contenttype=_JSON_CONTENT_TYPE)

    def get_batch(self, uuid):
        return self._root.get('batches/%s' % uuid)

    def get_batch_status(self, uuid):
        response = self._root.get('batches/%s/state' % uuid)
        return response['state']

    def get_batch_log(self, uuid, startFrom=None, size=None):
        params = {}

        if startFrom is not None:
            params['from'] = startFrom

        if size is not None:
            params['size'] = size

        response = self._root.get('batches/%s/log' % uuid, params=params)

        return '\n'.join(response['log'])

    def close_batch(self, uuid):
        return self._root.delete('batches/%s' % uuid)
Ejemplo n.º 47
0
class MapreduceApi(object):

  def __init__(self, oozie_url, security_enabled=False, ssl_cert_ca_verify=False):
    self._url = posixpath.join(oozie_url, 'proxy')
    self._client = HttpClient(self._url, logger=LOG)
    self._root = Resource(self._client)
    self._security_enabled = security_enabled

    if self._security_enabled:
      self._client.set_kerberos_auth()

    self._client.set_verify(ssl_cert_ca_verify)

  def __str__(self):
    return "MapreduceApi at %s" % (self._url,)

  @property
  def url(self):
    return self._url

  def job(self, user, job_id):
    app_id = job_id.replace('job', 'application')
    return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s' % {'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE})

  def counters(self, job_id):
    app_id = job_id.replace('job', 'application')
    response = self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/counters' % {'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE})
    # If it hits the job history server, it will return HTML.
    # Simply return None in this case because there isn't much data there.
    if isinstance(response, basestring):
      return None
    else:
      return response

  def tasks(self, job_id):
    app_id = job_id.replace('job', 'application')
    return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks' % {'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE})

  def job_attempts(self, job_id):
    app_id = job_id.replace('job', 'application')
    return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/jobattempts' % {'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE})

  def conf(self, job_id):
    app_id = job_id.replace('job', 'application')
    return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/conf' % {'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE})

  def task(self, job_id, task_id):
    app_id = job_id.replace('job', 'application')
    return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s' % {'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE})

  def task_counters(self, job_id, task_id):
    app_id = job_id.replace('job', 'application')
    job_id = job_id.replace('application', 'job')
    return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/counters' % {'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE})

  def task_attempts(self, job_id, task_id):
    app_id = job_id.replace('job', 'application')
    return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts' % {'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE})

  def task_attempt(self, job_id, task_id, attempt_id):
    app_id = job_id.replace('job', 'application')
    job_id = job_id.replace('application', 'job')
    return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts/%(attempt_id)s' % {'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'attempt_id': attempt_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE})

  def kill(self, job_id):
    app_id = job_id.replace('job', 'application')
    get_resource_manager().kill(app_id) # We need to call the RM
Ejemplo n.º 48
0
  except (KeyError, RestException), e:
    raise KeyError(_("Cannot find job attempt '%(id)s'.") % {'id': job.jobId}, e)
  except Exception, e:
    raise Exception(_("Failed to get application for job %s: %s") % (job.jobId, e))

  if log_link:
    link = '/%s/' % name
    params = {}
    if offset != 0:
      params['start'] = offset

    root = Resource(get_log_client(log_link), urlparse.urlsplit(log_link)[2], urlencode=False)
    api_resp = None

    try:
      api_resp = root.get(link, params=params)
      log = html.fromstring(api_resp, parser=html.HTMLParser()).xpath('/html/body/table/tbody/tr/td[2]')[0].text_content()

      response['status'] = 0
      response['log'] = LinkJobLogs._make_hdfs_links(log)
    except Exception, e:
      response['log'] = _('Failed to retrieve log: %s' % e)
      try:
        debug_info = '\nLog Link: %s' % log_link
        if api_resp:
          debug_info += '\nHTML Response: %s' % response
        response['debug'] = debug_info
        LOG.error(debug_info)
      except:
        LOG.exception('failed to create debug info')
Ejemplo n.º 49
0
class OozieApi(object):
    def __init__(self,
                 oozie_url,
                 user,
                 security_enabled=False,
                 api_version=API_VERSION,
                 ssl_cert_ca_verify=True):
        self._url = posixpath.join(oozie_url, api_version)
        self._client = HttpClient(self._url, logger=LOG)

        if security_enabled:
            self._client.set_kerberos_auth()

        self._client.set_verify(ssl_cert_ca_verify)

        self._root = Resource(self._client)
        self._security_enabled = security_enabled
        # To store username info
        if hasattr(user, 'username'):
            self.user = user.username
        else:
            self.user = user
        self.api_version = api_version

    def __str__(self):
        return "OozieApi at %s" % (self._url, )

    @property
    def url(self):
        return self._url

    @property
    def security_enabled(self):
        return self._security_enabled

    def _get_params(self):
        if self.security_enabled:
            return {'doAs': self.user, 'timezone': TIME_ZONE.get()}
        return {
            'user.name': DEFAULT_USER,
            'doAs': self.user,
            'timezone': TIME_ZONE.get()
        }

    def _get_oozie_properties(self, properties=None):
        defaults = {
            'user.name': self.user,
        }

        if properties is not None:
            defaults.update(properties)

        return defaults

    VALID_JOB_FILTERS = ('name', 'user', 'group', 'status', 'startcreatedtime')
    VALID_LOG_FILTERS = set(('recent', 'limit', 'loglevel', 'text'))

    def get_jobs(self, jobtype, offset=None, cnt=None, filters=None):
        """
    Get a list of Oozie jobs.

    Note that offset is 1-based.
    kwargs is used for filtering and may be one of VALID_FILTERS: name, user, group, status
    """
        params = self._get_params()
        if offset is not None:
            params['offset'] = str(offset)
        if cnt is not None:
            params['len'] = str(cnt)
        if filters is None:
            filters = []
        params['jobtype'] = jobtype

        filter_list = []
        for key, val in filters:
            if key not in OozieApi.VALID_JOB_FILTERS:
                raise ValueError(
                    '"%s" is not a valid filter for selecting jobs' % (key, ))
            filter_list.append('%s=%s' % (key, val))
        params['filter'] = ';'.join(filter_list)

        # Send the request
        resp = self._root.get('jobs', params)
        if jobtype == 'wf':
            wf_list = WorkflowList(self, resp, filters=filters)
        elif jobtype == 'coord':
            wf_list = CoordinatorList(self, resp, filters=filters)
        else:
            wf_list = BundleList(self, resp, filters=filters)
        return wf_list

    def get_workflows(self, offset=None, cnt=None, filters=None):
        return self.get_jobs('wf', offset, cnt, filters)

    def get_coordinators(self, offset=None, cnt=None, filters=None):
        return self.get_jobs('coord', offset, cnt, filters)

    def get_bundles(self, offset=None, cnt=None, filters=None):
        return self.get_jobs('bundle', offset, cnt, filters)

    # TODO: make get_job accept any jobid
    def get_job(self, jobid):
        """
    get_job(jobid) -> Workflow
    """
        params = self._get_params()
        resp = self._root.get('job/%s' % (jobid, ), params)
        wf = Workflow(self, resp)
        return wf

    def get_coordinator(self, jobid, offset=None, cnt=None, filters=None):
        params = self._get_params()
        if offset is not None:
            params['offset'] = str(offset)
        if cnt is not None:
            params['len'] = str(cnt)
        if filters is None:
            filters = {}
        params.update({'order': 'desc'})

        filter_list = []
        for key, val in filters:
            if key not in OozieApi.VALID_JOB_FILTERS:
                raise ValueError(
                    '"%s" is not a valid filter for selecting jobs' % (key, ))
            filter_list.append('%s=%s' % (key, val))
        params['filter'] = ';'.join(filter_list)

        resp = self._root.get('job/%s' % (jobid, ), params)
        return Coordinator(self, resp)

    def get_bundle(self, jobid):
        params = self._get_params()
        resp = self._root.get('job/%s' % (jobid, ), params)
        return Bundle(self, resp)

    def get_job_definition(self, jobid):
        """
    get_job_definition(jobid) -> Definition (xml string)
    """
        params = self._get_params()
        params['show'] = 'definition'
        return self._root.get('job/%s' % (jobid, ), params)

    def get_job_log(self, jobid, logfilter=None):
        """
    get_job_log(jobid) -> Log (xml string)
    """
        params = self._get_params()
        params['show'] = 'log'

        filter_list = []
        if logfilter is None:
            logfilter = []
        for key, val in logfilter:
            if key not in OozieApi.VALID_LOG_FILTERS:
                raise ValueError('"%s" is not a valid filter for job logs' %
                                 (key, ))
            filter_list.append('%s=%s' % (key, val))
        params['logfilter'] = ';'.join(filter_list)
        return self._root.get('job/%s' % (jobid, ), params)

    def get_job_status(self, jobid):
        params = self._get_params()
        params['show'] = 'status'

        xml = self._root.get('job/%s' % (jobid, ), params)
        return xml

    def get_action(self, action_id):
        if 'C@' in action_id:
            Klass = CoordinatorAction
        elif 'B@' in action_id:
            Klass = BundleAction
        else:
            Klass = WorkflowAction
        params = self._get_params()
        resp = self._root.get('job/%s' % (action_id, ), params)
        return Klass(resp)

    def job_control(self, jobid, action, properties=None, parameters=None):
        """
    job_control(jobid, action) -> None
    Raise RestException on error.
    """
        if action not in ('start', 'suspend', 'resume', 'kill', 'rerun',
                          'coord-rerun', 'bundle-rerun', 'change', 'ignore',
                          'update'):
            msg = 'Invalid oozie job action: %s' % (action, )
            LOG.error(msg)
            raise ValueError(msg)
        properties = self._get_oozie_properties(properties)
        params = self._get_params()
        params['action'] = action
        if parameters is not None:
            params.update(parameters)

        return self._root.put('job/%s' % jobid,
                              params,
                              data=config_gen(properties),
                              contenttype=_XML_CONTENT_TYPE)

    def submit_workflow(self, application_path, properties=None):
        """
    submit_workflow(application_path, properties=None) -> jobid

    Raise RestException on error.
    """
        defaults = {
            'oozie.wf.application.path': application_path,
            'user.name': self.user,
        }

        if properties is not None:
            defaults.update(properties)
        properties = defaults

        return self.submit_job(properties)

    # Is name actually submit_coord?
    def submit_job(self, properties=None):
        """
    submit_job(properties=None, id=None) -> jobid

    Raise RestException on error.
    """
        defaults = {
            'user.name': self.user,
        }

        if properties is not None:
            defaults.update(properties)

        properties = defaults

        params = self._get_params()
        resp = self._root.post('jobs',
                               params,
                               data=config_gen(properties),
                               contenttype=_XML_CONTENT_TYPE)
        return resp['id']

    def dryrun(self, properties=None):
        defaults = {
            'user.name': self.user,
        }

        if properties is not None:
            defaults.update(properties)

        properties = defaults

        params = self._get_params()
        params['action'] = 'dryrun'
        return self._root.post('jobs',
                               params,
                               data=config_gen(properties),
                               contenttype=_XML_CONTENT_TYPE)

    def rerun(self, jobid, properties=None, params=None):
        properties = self._get_oozie_properties(properties)
        if params is None:
            params = self._get_params()
        else:
            self._get_params().update(params)

        params['action'] = 'rerun'

        return self._root.put('job/%s' % jobid,
                              params,
                              data=config_gen(properties),
                              contenttype=_XML_CONTENT_TYPE)

    def get_build_version(self):
        """
    get_build_version() -> Build version (dictionary)
    """
        params = self._get_params()
        resp = self._root.get('admin/build-version', params)
        return resp

    def get_instrumentation(self):
        params = self._get_params()
        resp = self._root.get('admin/instrumentation', params)
        return resp

    def get_metrics(self):
        params = self._get_params()
        resp = self._root.get('admin/metrics', params)
        return resp

    def get_configuration(self):
        """
    get_configuration() -> Oozie config (dictionary)
    """
        params = self._get_params()
        resp = self._root.get('admin/configuration', params)
        return resp

    def get_oozie_status(self):
        """
    get_oozie_status() -> Oozie status (dictionary)
    """
        params = self._get_params()
        resp = self._root.get('admin/status', params)
        return resp

    def get_oozie_slas(self, **kwargs):
        """
    filter=
      app_name=my-sla-app
      id=0000002-131206135002457-oozie-oozi-W
      nominal_start=2013-06-18T00:01Z
      nominal_end=2013-06-23T00:01Z
    """
        params = self._get_params()
        params['filter'] = ';'.join(
            ['%s=%s' % (key, val) for key, val in kwargs.iteritems()])
        resp = self._root.get('sla', params)
        return resp['slaSummaryList']
Ejemplo n.º 50
0
class MapreduceApi(object):
    def __init__(self, oozie_url):
        self._url = posixpath.join(oozie_url, 'proxy')
        self._client = HttpClient(self._url, logger=LOG)
        self._root = Resource(self._client)
        self._security_enabled = False

    def __str__(self):
        return "MapreduceApi at %s" % (self._url, )

    @property
    def url(self):
        return self._url

    def job(self, user, job_id):
        app_id = job_id.replace('job', 'application')
        return self._root.get(
            '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s' % {
                'app_id': app_id,
                'job_id': job_id,
                'version': _API_VERSION
            },
            headers={'Accept': _JSON_CONTENT_TYPE})

    def counters(self, job_id):
        app_id = job_id.replace('job', 'application')
        return self._root.get(
            '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/counters' % {
                'app_id': app_id,
                'job_id': job_id,
                'version': _API_VERSION
            },
            headers={'Accept': _JSON_CONTENT_TYPE})

    def tasks(self, job_id):
        app_id = job_id.replace('job', 'application')
        return self._root.get(
            '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks' % {
                'app_id': app_id,
                'job_id': job_id,
                'version': _API_VERSION
            },
            headers={'Accept': _JSON_CONTENT_TYPE})

    def job_attempts(self, job_id):
        app_id = job_id.replace('job', 'application')
        return self._root.get(
            '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/jobattempts' %
            {
                'app_id': app_id,
                'job_id': job_id,
                'version': _API_VERSION
            },
            headers={'Accept': _JSON_CONTENT_TYPE})

    def conf(self, job_id):
        app_id = job_id.replace('job', 'application')
        return self._root.get(
            '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/conf' % {
                'app_id': app_id,
                'job_id': job_id,
                'version': _API_VERSION
            },
            headers={'Accept': _JSON_CONTENT_TYPE})

    def task(self, job_id, task_id):
        app_id = job_id.replace('job', 'application')
        return self._root.get(
            '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s'
            % {
                'app_id': app_id,
                'job_id': job_id,
                'task_id': task_id,
                'version': _API_VERSION
            },
            headers={'Accept': _JSON_CONTENT_TYPE})

    def task_counters(self, job_id, task_id):
        app_id = job_id.replace('job', 'application')
        job_id = job_id.replace('application', 'job')
        return self._root.get(
            '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/counters'
            % {
                'app_id': app_id,
                'job_id': job_id,
                'task_id': task_id,
                'version': _API_VERSION
            },
            headers={'Accept': _JSON_CONTENT_TYPE})

    def task_attempts(self, job_id, task_id):
        app_id = job_id.replace('job', 'application')
        return self._root.get(
            '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts'
            % {
                'app_id': app_id,
                'job_id': job_id,
                'task_id': task_id,
                'version': _API_VERSION
            },
            headers={'Accept': _JSON_CONTENT_TYPE})

    def task_attempt(self, job_id, task_id, attempt_id):
        app_id = job_id.replace('job', 'application')
        job_id = job_id.replace('application', 'job')
        return self._root.get(
            '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts/%(attempt_id)s'
            % {
                'app_id': app_id,
                'job_id': job_id,
                'task_id': task_id,
                'attempt_id': attempt_id,
                'version': _API_VERSION
            },
            headers={'Accept': _JSON_CONTENT_TYPE})