Ejemplo n.º 1
0
class SparkJob(Application):

  def __init__(self, job, rm_api=None, hs_api=None):
    super(SparkJob, self).__init__(job, rm_api)
    self._resolve_tracking_url()
    if self.status not in ('NEW', 'SUBMITTED', 'ACCEPTED') and hs_api:
      self.history_server_api = hs_api
      self._get_metrics()

  @property
  def logs_url(self):
    log_links = self.history_server_api.get_executors_loglinks(self)
    return log_links['stdout'] if log_links and 'stdout' in log_links else ''

  @property
  def attempt_id(self):
    return self.trackingUrl.strip('/').split('/')[-1]

  def _resolve_tracking_url(self):
    resp = None
    try:
      self._client = HttpClient(self.trackingUrl, logger=LOG)
      self._root = Resource(self._client)
      yarn_cluster = cluster.get_cluster_conf_for_job_submission()
      self._security_enabled = yarn_cluster.SECURITY_ENABLED.get()
      if self._security_enabled:
        self._client.set_kerberos_auth()

      self._client.set_verify(yarn_cluster.SSL_CERT_CA_VERIFY.get())
      actual_url = self._execute(self._root.resolve_redirect_url())

      if actual_url.strip('/').split('/')[-1] == 'jobs':
        actual_url = actual_url.strip('/').replace('jobs', '')
      self.trackingUrl = actual_url
      LOG.debug("SparkJob tracking URL: %s" % self.trackingUrl)
    except Exception, e:
      LOG.warn("Failed to resolve Spark Job's actual tracking URL: %s" % e)
    finally: