Exemple #1
0
    def _parse_configs(self):
        mr_cluster = get_cluster_conf_for_job_submission()
        if mr_cluster is None:
            mapred_home = None
        else:
            mapred_home = mr_cluster.HADOOP_MAPRED_HOME.get()

        shell_types = [
        ]  # List of available shell types. For each shell type, we have a nice name (e.g. "Python Shell") and a short name (e.g. "python")
        for item in shell.conf.SHELL_TYPES.keys():
            if mapred_home:
                env_for_shell = {constants.HADOOP_MAPRED_HOME: mapred_home}
            else:
                env_for_shell = {}
            command = shell.conf.SHELL_TYPES[item].command.get().strip().split(
            )
            nice_name = shell.conf.SHELL_TYPES[item].nice_name.get().strip()
            executable_exists = utils.executable_exists(command)
            if executable_exists:
                self._command_by_short_name[item] = command
                conf_shell_env = shell.conf.SHELL_TYPES[item].environment
                for env_variable in conf_shell_env.keys():
                    env_for_shell[env_variable] = conf_shell_env[
                        env_variable].value.get()
                self._env_by_short_name[item] = env_for_shell
            shell_types.append({
                constants.NICE_NAME: nice_name,
                constants.KEY_NAME: item,
                constants.EXISTS: executable_exists
            })
        self.shell_types = shell_types
Exemple #2
0
def get_log_client(log_link):
    global _log_client_queue
    global MAX_HEAP_SIZE
    _log_client_lock.acquire()
    try:
        components = urlparse.urlsplit(log_link)
        base_url = '%(scheme)s://%(netloc)s' % {
            'scheme': components[0],
            'netloc': components[1]
        }

        # Takes on form (epoch time, client object)
        # Least Recently Used algorithm.
        client_tuple = next(
            (tup for tup in _log_client_heap if tup[1].base_url == base_url),
            None)
        if client_tuple is None:
            client = HttpClient(base_url, LOG)
            yarn_cluster = cluster.get_cluster_conf_for_job_submission()
            if yarn_cluster.SECURITY_ENABLED.get():
                client.set_kerberos_auth()
        else:
            _log_client_heap.remove(client_tuple)
            client = client_tuple[1]

        new_client_tuple = (time.time(), client)
        if len(_log_client_heap) >= MAX_HEAP_SIZE:
            heapq.heapreplace(_log_client_heap, new_client_tuple)
        else:
            heapq.heappush(_log_client_heap, new_client_tuple)

        return client
    finally:
        _log_client_lock.release()
Exemple #3
0
    def get_security(self):
        principal = self.query_server["principal"]
        impersonation_enabled = False

        if principal:
            kerberos_principal_short_name = principal.split("/", 1)[0]
        else:
            kerberos_principal_short_name = None

        if self.query_server["server_name"] == "impala":
            cluster_conf = cluster.get_cluster_conf_for_job_submission()
            use_sasl = cluster_conf is not None and cluster_conf.SECURITY_ENABLED.get()
            mechanism = HiveServerClient.HS2_MECHANISMS["KERBEROS"]
            impersonation_enabled = self.query_server["impersonation_enabled"]
        else:
            hive_mechanism = hive_site.get_hiveserver2_authentication()
            if hive_mechanism not in HiveServerClient.HS2_MECHANISMS:
                raise Exception(
                    _(
                        "%s server authentication not supported. Valid are %s."
                        % (hive_mechanism, HiveServerClient.HS2_MECHANISMS.keys())
                    )
                )
            use_sasl = hive_mechanism in ("KERBEROS", "NONE")
            mechanism = HiveServerClient.HS2_MECHANISMS[hive_mechanism]
            impersonation_enabled = hive_site.hiveserver2_impersonation_enabled()

        return use_sasl, mechanism, kerberos_principal_short_name, impersonation_enabled
  def get_security(cls, query_server):
    principal = query_server['principal']
    impersonation_enabled = False

    if query_server['server_name'] == 'impala':
      cluster_conf = cluster.get_cluster_conf_for_job_submission()
      use_sasl = cluster_conf is not None and cluster_conf.SECURITY_ENABLED.get()
      mechanism = HiveServerClient.HS2_MECHANISMS['KERBEROS']
      impersonation_enabled = query_server['impersonation_enabled']
    else:
      hive_mechanism = hive_site.get_hiveserver2_authentication()
      if hive_mechanism not in HiveServerClient.HS2_MECHANISMS:
        raise Exception(_('%s server authentication not supported. Valid are %s.' % (hive_mechanism, HiveServerClient.HS2_MECHANISMS.keys())))
      use_sasl = hive_mechanism in ('KERBEROS', 'NONE')
      mechanism = 'NOSASL'
      if use_sasl:
        mechanism = HiveServerClient.HS2_MECHANISMS[hive_mechanism]
      impersonation_enabled = hive_site.hiveserver2_impersonation_enabled()

    if principal:
      kerberos_principal_short_name = principal.split('/', 1)[0]
    else:
      kerberos_principal_short_name = None

    return use_sasl, mechanism, kerberos_principal_short_name, impersonation_enabled
Exemple #5
0
    def get_security(self):
        principal = self.query_server['principal']
        impersonation_enabled = False

        if principal:
            kerberos_principal_short_name = principal.split('/', 1)[0]
        else:
            kerberos_principal_short_name = None

        if self.query_server['server_name'] == 'impala':
            cluster_conf = cluster.get_cluster_conf_for_job_submission()
            use_sasl = cluster_conf is not None and cluster_conf.SECURITY_ENABLED.get(
            )
            mechanism = HiveServerClient.HS2_MECHANISMS['KERBEROS']
            impersonation_enabled = self.query_server['impersonation_enabled']
        else:
            hive_mechanism = hive_site.get_hiveserver2_authentication()
            if hive_mechanism not in HiveServerClient.HS2_MECHANISMS:
                raise Exception(
                    _('%s server authentication not supported. Valid are %s.' %
                      (hive_mechanism,
                       HiveServerClient.HS2_MECHANISMS.keys())))
            use_sasl = hive_mechanism in ('KERBEROS', 'NONE')
            mechanism = HiveServerClient.HS2_MECHANISMS[hive_mechanism]
            impersonation_enabled = hive_site.hiveserver2_impersonation_enabled(
            )

        return use_sasl, mechanism, kerberos_principal_short_name, impersonation_enabled
Exemple #6
0
def get_log_client(log_link):
  global _log_client_queue
  global MAX_HEAP_SIZE
  _log_client_lock.acquire()
  try:
    components = urlparse.urlsplit(log_link)
    base_url = '%(scheme)s://%(netloc)s' % {
      'scheme': components[0],
      'netloc': components[1]
    }

    # Takes on form (epoch time, client object)
    # Least Recently Used algorithm.
    client_tuple = next((tup for tup in _log_client_heap if tup[1].base_url == base_url), None)
    if client_tuple is None:
      client = HttpClient(base_url, LOG)
      yarn_cluster = cluster.get_cluster_conf_for_job_submission()
      if yarn_cluster.SECURITY_ENABLED.get():
        client.set_kerberos_auth()
    else:
      _log_client_heap.remove(client_tuple)
      client = client_tuple[1]

    new_client_tuple = (time.time(), client)
    if len(_log_client_heap) >= MAX_HEAP_SIZE:
      heapq.heapreplace(_log_client_heap, new_client_tuple)
    else:
      heapq.heappush(_log_client_heap, new_client_tuple)

    return client
  finally:
    _log_client_lock.release()
Exemple #7
0
  def get_security(self):
    principal = self.query_server['principal']
    impersonation_enabled = False
    ldap_username = None
    ldap_password = get_ldap_password()

    if ldap_password is not None: # Pass-through LDAP authentication
      ldap_username = LDAP_USERNAME.get()

    if principal:
      kerberos_principal_short_name = principal.split('/', 1)[0]
    else:
      kerberos_principal_short_name = None

    if self.query_server['server_name'] == 'impala':
      if ldap_password: # Force LDAP auth if ldap_password is provided
        use_sasl = True
        mechanism = HiveServerClient.HS2_MECHANISMS['NONE']
      else:
        cluster_conf = cluster.get_cluster_conf_for_job_submission()
        use_sasl = cluster_conf is not None and cluster_conf.SECURITY_ENABLED.get()
        mechanism = HiveServerClient.HS2_MECHANISMS['KERBEROS']
      impersonation_enabled = self.query_server['impersonation_enabled']
    else:
      hive_mechanism = hive_site.get_hiveserver2_authentication()
      if hive_mechanism not in HiveServerClient.HS2_MECHANISMS:
        raise Exception(_('%s server authentication not supported. Valid are %s.') % (hive_mechanism, HiveServerClient.HS2_MECHANISMS.keys()))
      use_sasl = hive_mechanism in ('KERBEROS', 'NONE', 'LDAP')
      mechanism = HiveServerClient.HS2_MECHANISMS[hive_mechanism]
      impersonation_enabled = hive_site.hiveserver2_impersonation_enabled()

    return use_sasl, mechanism, kerberos_principal_short_name, impersonation_enabled, ldap_username, ldap_password
Exemple #8
0
def config_validator(user):
  res = []

  yarn_cluster = cluster.get_cluster_conf_for_job_submission()

  if yarn_cluster.SECURITY_ENABLED.get() and not os.path.exists(SQOOP_CONF_DIR.get()):
    res.append((NICE_NAME, _t("The app won't work without a valid %s property.") % SQOOP_CONF_DIR.grab_key))

  return res
Exemple #9
0
  def get_log_link(self):
    attempt = self.task.job.job_attempts['jobAttempt'][-1]
    log_link = attempt['logsLink']
    if not log_link:
      return log_link, None

    # Generate actual task log link from logsLink url
    if self.task.job.status in ('NEW', 'SUBMITTED', 'RUNNING'):
      logs_path = '/node/containerlogs/'
      node_url, tracking_path = log_link.split(logs_path)
      container_id, user = tracking_path.strip('/').split('/')

      # Replace log path tokens with actual container properties if available
      if hasattr(self, 'nodeHttpAddress') and 'nodeId' in attempt:
        node_url = '%s://%s' % (node_url.split('://')[0], self.nodeHttpAddress)
      container_id = self.assignedContainerId if hasattr(self, 'assignedContainerId') else container_id

      log_link = '%(node_url)s/%(logs_path)s/%(container)s/%(user)s' % {
        'node_url': node_url,
        'logs_path': logs_path.strip('/'),
        'container': container_id,
        'user': user
      }
    else:  # Completed jobs
      if '/node/containerlogs/' in log_link:
        # Applications that use NodeManager API instead of JobHistory API, like new "Oozie Launcher",
        # have `logsLink` URL pointed to NodeManager even for completed jobs
        logs_path = '/node/containerlogs/'

        yarn_cluster = cluster.get_cluster_conf_for_job_submission()
        root_url = yarn_cluster.HISTORY_SERVER_API_URL.get()

        tracking_path = log_link.split(logs_path)[1]
        container_id, user = tracking_path.strip('/').split('/')

        node_url = self.nodeId
        attempt_id = self.id
      else:
        logs_path = '/jobhistory/logs/'
        root_url, tracking_path = log_link.split(logs_path)
        node_url, container_id, attempt_id, user = tracking_path.strip('/').split('/')

        # Replace log path tokens with actual attempt properties if available
        if hasattr(self, 'nodeHttpAddress') and 'nodeId' in attempt:
          node_url = '%s:%s' % (self.nodeHttpAddress.split(':')[0], attempt['nodeId'].split(':')[1])
        container_id = self.assignedContainerId if hasattr(self, 'assignedContainerId') else container_id
        attempt_id = self.attemptId if hasattr(self, 'attemptId') else attempt_id

      log_link = '%(root_url)s/jobhistory/logs/%(node)s/%(container)s/%(attempt)s/%(user)s' % {
        'root_url': root_url,
        'node': node_url,
        'container': container_id,
        'attempt': attempt_id,
        'user': user
      }

    return log_link, user
Exemple #10
0
def get_mapreduce_api():
    global _api_cache
    if _api_cache is None:
        _api_cache_lock.acquire()
        try:
            if _api_cache is None:
                yarn_cluster = cluster.get_cluster_conf_for_job_submission()
                _api_cache = MapreduceApi(yarn_cluster.PROXY_API_URL.get())
        finally:
            _api_cache_lock.release()
    return _api_cache
Exemple #11
0
def get_history_server_api():
  global _api_cache
  if _api_cache is None:
    _api_cache_lock.acquire()
    try:
      if _api_cache is None:
        yarn_cluster = cluster.get_cluster_conf_for_job_submission()
        _api_cache = HistoryServerApi(yarn_cluster.HISTORY_SERVER_API_URL.get(), yarn_cluster.SECURITY_ENABLED.get())
    finally:
      _api_cache_lock.release()
  return _api_cache
Exemple #12
0
def get_resource_manager():
  global _api_cache
  if _api_cache is None:
    _api_cache_lock.acquire()
    try:
      if _api_cache is None:
        yarn_cluster = cluster.get_cluster_conf_for_job_submission()
        _api_cache = ResourceManagerApi(yarn_cluster.RESOURCE_MANAGER_API_URL.get(), yarn_cluster.SECURITY_ENABLED.get())
    finally:
      _api_cache_lock.release()
  return _api_cache
Exemple #13
0
def get_mapreduce_api():
  global _api_cache
  if _api_cache is None:
    _api_cache_lock.acquire()
    try:
      if _api_cache is None:
        yarn_cluster = cluster.get_cluster_conf_for_job_submission()
        _api_cache = MapreduceApi(yarn_cluster.PROXY_API_URL.get(), yarn_cluster.SECURITY_ENABLED.get(), yarn_cluster.SSL_CERT_CA_VERIFY.get())
    finally:
      _api_cache_lock.release()
  return _api_cache
Exemple #14
0
def get_history_server_api():
    global _api_cache
    if _api_cache is None:
        _api_cache_lock.acquire()
        try:
            if _api_cache is None:
                yarn_cluster = cluster.get_cluster_conf_for_job_submission()
                _api_cache = HistoryServerApi(
                    yarn_cluster.HISTORY_SERVER_API_URL.get())
        finally:
            _api_cache_lock.release()
    return _api_cache
Exemple #15
0
def config_validator(user):
    res = []

    yarn_cluster = cluster.get_cluster_conf_for_job_submission()

    if yarn_cluster.SECURITY_ENABLED.get() and not os.path.exists(
            SQOOP_CONF_DIR.get()):
        res.append(
            (NICE_NAME, _t("The app won't work without a valid %s property.") %
             SQOOP_CONF_DIR.grab_key))

    return res
Exemple #16
0
def get_mapreduce_api(user):
  global _api_cache
  if _api_cache is None:
    _api_cache_lock.acquire()
    try:
      if _api_cache is None:
        yarn_cluster = cluster.get_cluster_conf_for_job_submission()
        if yarn_cluster is not None:
          _api_cache = MapreduceApi(user, yarn_cluster.PROXY_API_URL.get(), yarn_cluster.SECURITY_ENABLED.get(), yarn_cluster.SSL_CERT_CA_VERIFY.get())
    finally:
      _api_cache_lock.release()
  return _api_cache
Exemple #17
0
def get_resource_manager(username):
  global API_CACHE
  if API_CACHE is None:
    API_CACHE_LOCK.acquire()
    try:
      if API_CACHE is None:
        yarn_cluster = cluster.get_cluster_conf_for_job_submission()
        if yarn_cluster is None:
          raise PopupException(_('No Resource Manager are available.'))
        API_CACHE = ResourceManagerApi(username, yarn_cluster.RESOURCE_MANAGER_API_URL.get(), yarn_cluster.SECURITY_ENABLED.get(), yarn_cluster.SSL_CERT_CA_VERIFY.get())
    finally:
      API_CACHE_LOCK.release()
  return API_CACHE
Exemple #18
0
def get_resource_manager():
  global _api_cache
  if _api_cache is None:
    _api_cache_lock.acquire()
    try:
      if _api_cache is None:
        yarn_cluster = cluster.get_cluster_conf_for_job_submission()
        if yarn_cluster is None:
          raise PopupException(_('No Resource Manager are available.'))
        _api_cache = ResourceManagerApi(yarn_cluster.RESOURCE_MANAGER_API_URL.get(), yarn_cluster.SECURITY_ENABLED.get(), yarn_cluster.SSL_CERT_CA_VERIFY.get())
    finally:
      _api_cache_lock.release()
  return _api_cache
Exemple #19
0
def get_resource_manager(user):
  global _api_cache
  if _api_cache is None:
    _api_cache_lock.acquire()
    try:
      if _api_cache is None:
        yarn_cluster = cluster.get_cluster_conf_for_job_submission()
        if yarn_cluster is None:
          raise PopupException(_('No Resource Manager are available.'))
        _api_cache = ResourceManagerApi(user, yarn_cluster.RESOURCE_MANAGER_API_URL.get(), yarn_cluster.SECURITY_ENABLED.get(), yarn_cluster.SSL_CERT_CA_VERIFY.get())
    finally:
      _api_cache_lock.release()
  return _api_cache
def get_service_info(service):
    service_info = {}
    if service.lower() == 'solr':
        service_info['url'] = SOLR_URL.get()
        service_info['security_enabled'] = SOLR_SECURITY_ENABLED.get()
    if service.lower() == 'oozie':
        service_info['url'] = OOZIE_URL.get()
        service_info['security_enabled'] = OOZIE_SECURITY_ENABLED.get()
    if service.lower() == 'httpfs':
        hdfs_config = hdfs_conf.HDFS_CLUSTERS['default']
        service_info['url'] = hdfs_config.WEBHDFS_URL.get()
        service_info['security_enabled'] = hdfs_config.SECURITY_ENABLED.get()
    if service.lower() == 'rm':
        yarn_cluster = cluster.get_cluster_conf_for_job_submission()
        service_info['url'] = yarn_cluster.RESOURCE_MANAGER_API_URL.get()
        service_info['security_enabled'] = yarn_cluster.SECURITY_ENABLED.get()
    if service.lower() == 'jhs':
        yarn_cluster = cluster.get_cluster_conf_for_job_submission()
        service_info['url'] = yarn_cluster.HISTORY_SERVER_API_URL.get()
        service_info['security_enabled'] = yarn_cluster.SECURITY_ENABLED.get()
    if service.lower() == 'sparkhs':
        yarn_cluster = cluster.get_cluster_conf_for_job_submission()
        service_info['url'] = yarn_cluster.SPARK_HISTORY_SERVER_URL.get()
        service_info[
            'security_enabled'] = yarn_cluster.SPARK_HISTORY_SERVER_SECURITY_ENABLED.get(
            )

    if 'url' not in service_info:
        logging.info("Hue does not have %s configured, cannot test %s" %
                     (service, service))
    elif service_info['url'] is None:
        logging.info("Hue does not have %s configured, cannot test %s" %
                     (service, service))

    if service_info['url'].endswith('/'):
        service_info['url'] = service_info['url'][:-1]

    return service_info
def get_history_server_api():
  # TODO: Spark History Server does not yet support setuser, implement when it does
  global API_CACHE

  if API_CACHE is None:
    API_CACHE_LOCK.acquire()
    try:
      if API_CACHE is None:
        yarn_cluster = cluster.get_cluster_conf_for_job_submission()
        if yarn_cluster is None:
          raise PopupException(_('No Spark History Server is available.'))
        API_CACHE = SparkHistoryServerApi(yarn_cluster.SPARK_HISTORY_SERVER_URL.get(), yarn_cluster.SECURITY_ENABLED.get(), yarn_cluster.SSL_CERT_CA_VERIFY.get())
    finally:
      API_CACHE_LOCK.release()

  return API_CACHE
Exemple #22
0
def get_mapreduce_api(username):
  global API_CACHE
  if API_CACHE is None:
    API_CACHE_LOCK.acquire()
    try:
      if API_CACHE is None:
        yarn_cluster = cluster.get_cluster_conf_for_job_submission()
        if yarn_cluster is None:
          raise PopupException(_('No Resource Manager are available.'))
        API_CACHE = MapreduceApi(yarn_cluster.PROXY_API_URL.get(), yarn_cluster.SECURITY_ENABLED.get(), yarn_cluster.SSL_CERT_CA_VERIFY.get())
    finally:
      API_CACHE_LOCK.release()

  API_CACHE.setuser(username)  # Set the correct user

  return API_CACHE
Exemple #23
0
def get_history_server_api(username):
  global API_CACHE
  if API_CACHE is None:
    API_CACHE_LOCK.acquire()
    try:
      if API_CACHE is None:
        yarn_cluster = cluster.get_cluster_conf_for_job_submission()
        if yarn_cluster is None:
          raise PopupException(_('YARN cluster is not available.'))
        API_CACHE = HistoryServerApi(yarn_cluster.HISTORY_SERVER_API_URL.get(), yarn_cluster.SECURITY_ENABLED.get(), yarn_cluster.SSL_CERT_CA_VERIFY.get())
    finally:
      API_CACHE_LOCK.release()

  API_CACHE.setuser(username)  # Set the correct user

  return API_CACHE
def get_history_server_api():
  # TODO: Spark History Server does not yet support setuser, implement when it does
  global API_CACHE

  if API_CACHE is None:
    API_CACHE_LOCK.acquire()
    try:
      if API_CACHE is None:
        yarn_cluster = cluster.get_cluster_conf_for_job_submission()
        if yarn_cluster is None:
          raise PopupException(_('No Spark History Server is available.'))
        API_CACHE = SparkHistoryServerApi(yarn_cluster.SPARK_HISTORY_SERVER_URL.get(), yarn_cluster.SPARK_HISTORY_SERVER_SECURITY_ENABLED.get(), yarn_cluster.SSL_CERT_CA_VERIFY.get())
    finally:
      API_CACHE_LOCK.release()

  return API_CACHE
Exemple #25
0
def get_history_server_api(username):
  global API_CACHE
  if API_CACHE is None:
    API_CACHE_LOCK.acquire()
    try:
      if API_CACHE is None:
        yarn_cluster = cluster.get_cluster_conf_for_job_submission()
        if yarn_cluster is None:
          raise PopupException(_('YARN cluster is not available.'))
        API_CACHE = HistoryServerApi(yarn_cluster.HISTORY_SERVER_API_URL.get(), yarn_cluster.SECURITY_ENABLED.get(), yarn_cluster.SSL_CERT_CA_VERIFY.get())
    finally:
      API_CACHE_LOCK.release()

  API_CACHE.setuser(username)  # Set the correct user

  return API_CACHE
Exemple #26
0
def get_metastore():
    """
  get_metastore() -> (is_local, host, port, kerberos_principal)

  Look at both hive-site.xml and beeswax.conf, and return the metastore information.

  hive-site.xml supersedes beeswax.conf.
  - If hive-site says local metastore (default), then get host & port from beeswax.conf.
  - If hive-site says remote, then use the URI specified there, so that we don't need to
    configure things twice.
  """
    global _METASTORE_LOC_CACHE
    if not _METASTORE_LOC_CACHE:
        thrift_uris = get_conf().get(_CNF_METASTORE_URIS)
        is_local = thrift_uris is None or thrift_uris == ''

        if is_local:
            cluster_conf = cluster.get_cluster_conf_for_job_submission()
            use_sasl = cluster_conf is not None and cluster_conf.SECURITY_ENABLED.get(
            )
            host = beeswax.conf.BEESWAX_META_SERVER_HOST.get()
            port = beeswax.conf.BEESWAX_META_SERVER_PORT.get()
            kerberos_principal = security_util.get_kerberos_principal(
                KERBEROS.HUE_PRINCIPAL.get(), socket.getfqdn())
        else:
            use_sasl = str(get_conf().get(_CNF_METASTORE_SASL,
                                          'false')).lower() == 'true'
            thrift_uri = thrift_uris.split(",")[0]
            host, port = 'undefined', '0'
            match = _THRIFT_URI_RE.match(thrift_uri)
            if not match:
                LOG.fatal('Cannot understand remote metastore uri "%s"' %
                          thrift_uri)
            else:
                host, port = match.groups()
            kerberos_principal = security_util.get_kerberos_principal(
                get_conf().get(_CNF_METASTORE_KERBEROS_PRINCIPAL, None),
                socket.getfqdn())

        kerberos_principal_components = security_util.get_components(
            kerberos_principal)
        if use_sasl and len(kerberos_principal_components) == 3:
            host = kerberos_principal_components[1]

        _METASTORE_LOC_CACHE = (is_local, host, int(port), kerberos_principal)
    return _METASTORE_LOC_CACHE
def get_mapreduce_api(username):
    global API_CACHE
    if API_CACHE is None:
        API_CACHE_LOCK.acquire()
        try:
            if API_CACHE is None:
                yarn_cluster = cluster.get_cluster_conf_for_job_submission()
                if yarn_cluster is None:
                    raise PopupException(
                        _('No Resource Manager are available.'))
                API_CACHE = MapreduceApi(yarn_cluster.PROXY_API_URL.get(),
                                         yarn_cluster.SECURITY_ENABLED.get(),
                                         yarn_cluster.SSL_CERT_CA_VERIFY.get())
        finally:
            API_CACHE_LOCK.release()

    API_CACHE.setuser(username)  # Set the correct user

    return API_CACHE
Exemple #28
0
  def _resolve_tracking_url(self):
    resp = None
    try:
      self._client = HttpClient(self.trackingUrl, logger=LOG)
      self._root = Resource(self._client)
      yarn_cluster = cluster.get_cluster_conf_for_job_submission()
      self._security_enabled = yarn_cluster.SECURITY_ENABLED.get()
      if self._security_enabled:
        self._client.set_kerberos_auth()

      self._client.set_verify(yarn_cluster.SSL_CERT_CA_VERIFY.get())
      actual_url = self._execute(self._root.resolve_redirect_url)

      if actual_url.strip('/').split('/')[-1] == 'jobs':
        actual_url = actual_url.strip('/').replace('jobs', '')
      self.trackingUrl = actual_url
      LOG.debug("SparkJob tracking URL: %s" % self.trackingUrl)
    except Exception, e:
      LOG.warn("Failed to resolve Spark Job's actual tracking URL: %s" % e)
Exemple #29
0
  def _resolve_tracking_url(self):
    resp = None
    try:
      self._client = HttpClient(self.trackingUrl, logger=LOG)
      self._root = Resource(self._client)
      yarn_cluster = cluster.get_cluster_conf_for_job_submission()
      self._security_enabled = yarn_cluster.SECURITY_ENABLED.get()
      if self._security_enabled:
        self._client.set_kerberos_auth()

      self._client.set_verify(yarn_cluster.SSL_CERT_CA_VERIFY.get())
      actual_url = self._execute(self._root.resolve_redirect_url())

      if actual_url.strip('/').split('/')[-1] == 'jobs':
        actual_url = actual_url.strip('/').replace('jobs', '')
      self.trackingUrl = actual_url
      LOG.debug("SparkJob tracking URL: %s" % self.trackingUrl)
    except Exception, e:
      LOG.warn("Failed to resolve Spark Job's actual tracking URL: %s" % e)
Exemple #30
0
  def _parse_configs(self):
    mr_cluster = get_cluster_conf_for_job_submission()
    if mr_cluster is None:
      mapred_home = None
    else:
      mapred_home = mr_cluster.HADOOP_MAPRED_HOME.get()

    shell_types = [] # List of available shell types. For each shell type, we have a nice name (e.g. "Python Shell") and a short name (e.g. "python")
    for item in shell.conf.SHELL_TYPES.keys():
      env_for_shell = { constants.HADOOP_MAPRED_HOME: mapred_home }
      command = shell.conf.SHELL_TYPES[item].command.get().strip().split()
      nice_name = shell.conf.SHELL_TYPES[item].nice_name.get().strip()
      executable_exists = utils.executable_exists(command)
      if executable_exists:
        self._command_by_short_name[item] = command
        conf_shell_env = shell.conf.SHELL_TYPES[item].environment
        for env_variable in conf_shell_env.keys():
          env_for_shell[env_variable] = conf_shell_env[env_variable].value.get()
        self._env_by_short_name[item] = env_for_shell
      shell_types.append({ constants.NICE_NAME: nice_name, constants.KEY_NAME: item, constants.EXISTS:executable_exists })
    self.shell_types = shell_types
Exemple #31
0
def get_metastore():
  """
  get_metastore() -> (is_local, host, port, kerberos_principal)

  Look at both hive-site.xml and beeswax.conf, and return the metastore information.

  hive-site.xml supersedes beeswax.conf.
  - If hive-site says local metastore (default), then get host & port from beeswax.conf.
  - If hive-site says remote, then use the URI specified there, so that we don't need to
    configure things twice.
  """
  global _METASTORE_LOC_CACHE
  if not _METASTORE_LOC_CACHE:
    thrift_uris = get_conf().get(_CNF_METASTORE_URIS)
    is_local = thrift_uris is None or thrift_uris == ''

    if is_local:
      cluster_conf = cluster.get_cluster_conf_for_job_submission()
      use_sasl = cluster_conf is not None and cluster_conf.SECURITY_ENABLED.get()
      host = beeswax.conf.BEESWAX_META_SERVER_HOST.get()
      port = beeswax.conf.BEESWAX_META_SERVER_PORT.get()
      kerberos_principal = security_util.get_kerberos_principal(KERBEROS.HUE_PRINCIPAL.get(), socket.getfqdn())
    else:
      use_sasl = str(get_conf().get(_CNF_METASTORE_SASL, 'false')).lower() == 'true'
      thrift_uri = thrift_uris.split(",")[0]
      host, port = 'undefined', '0'
      match = _THRIFT_URI_RE.match(thrift_uri)
      if not match:
        LOG.fatal('Cannot understand remote metastore uri "%s"' % thrift_uri)
      else:
        host, port = match.groups()
      kerberos_principal = security_util.get_kerberos_principal(get_conf().get(_CNF_METASTORE_KERBEROS_PRINCIPAL, None), socket.getfqdn())

    kerberos_principal_components = security_util.get_components(kerberos_principal)
    if use_sasl and len(kerberos_principal_components) == 3:
      host = kerberos_principal_components[1]

    _METASTORE_LOC_CACHE = (is_local, host, int(port), kerberos_principal)
  return _METASTORE_LOC_CACHE
Exemple #32
0
def is_enabled():
  from hadoop import cluster # Avoid dependencies conflicts
  cluster = cluster.get_cluster_conf_for_job_submission()

  return HOSTNAME.get() != 'localhost' and cluster.SECURITY_ENABLED.get()
Exemple #33
0
    def get_task_log(self, offset=0):
        logs = []
        attempt = self.task.job.job_attempts['jobAttempt'][-1]
        log_link = attempt['logsLink']

        # Generate actual task log link from logsLink url
        if self.task.job.status in ('NEW', 'SUBMITTED', 'RUNNING'):
            logs_path = '/node/containerlogs/'
            node_url, tracking_path = log_link.split(logs_path)
            container_id, user = tracking_path.strip('/').split('/')

            # Replace log path tokens with actual container properties if available
            if hasattr(self, 'nodeHttpAddress') and 'nodeId' in attempt:
                node_url = '%s://%s' % (node_url.split('://')[0],
                                        self.nodeHttpAddress)
            container_id = self.assignedContainerId if hasattr(
                self, 'assignedContainerId') else container_id

            log_link = '%(node_url)s/%(logs_path)s/%(container)s/%(user)s' % {
                'node_url': node_url,
                'logs_path': logs_path.strip('/'),
                'container': container_id,
                'user': user
            }
        else:  # Completed jobs
            if '/node/containerlogs/' in log_link:
                # Applications that use NodeManager API instead of JobHistory API, like new "Oozie Launcher",
                # have `logsLink` URL pointed to NodeManager even for completed jobs
                logs_path = '/node/containerlogs/'

                yarn_cluster = cluster.get_cluster_conf_for_job_submission()
                root_url = yarn_cluster.HISTORY_SERVER_API_URL.get()

                tracking_path = log_link.split(logs_path)[1]
                container_id, user = tracking_path.strip('/').split('/')

                node_url = self.nodeId
                attempt_id = self.id
            else:
                logs_path = '/jobhistory/logs/'
                root_url, tracking_path = log_link.split(logs_path)
                node_url, container_id, attempt_id, user = tracking_path.strip(
                    '/').split('/')

                # Replace log path tokens with actual attempt properties if available
                if hasattr(self, 'nodeHttpAddress') and 'nodeId' in attempt:
                    node_url = '%s:%s' % (self.nodeHttpAddress.split(':')[0],
                                          attempt['nodeId'].split(':')[1])
                container_id = self.assignedContainerId if hasattr(
                    self, 'assignedContainerId') else container_id
                attempt_id = self.attemptId if hasattr(
                    self, 'attemptId') else attempt_id

            log_link = '%(root_url)s/jobhistory/logs/%(node)s/%(container)s/%(attempt)s/%(user)s' % {
                'root_url': root_url,
                'node': node_url,
                'container': container_id,
                'attempt': attempt_id,
                'user': user
            }

        for name in ('stdout', 'stderr', 'syslog'):
            link = '/%s/' % name
            if self.type == 'Oozie Launcher' and not self.task.job.status == 'FINISHED':  # Yarn currently dumps with 500 error with doas in running state
                params = {}
            else:
                params = {'doAs': user}

            if int(offset) != 0:
                params['start'] = offset
            else:
                params['start'] = 0

            response = None
            try:
                log_link = re.sub('job_[^/]+', str(self.id), log_link)
                root = Resource(get_log_client(log_link),
                                urlparse.urlsplit(log_link)[2],
                                urlencode=False)
                response = root.get(link, params=params)
                log = html.fromstring(
                    response, parser=html.HTMLParser()).xpath(
                        '/html/body/table/tbody/tr/td[2]')[0].text_content()
            except Exception, e:
                log = _('Failed to retrieve log: %s' % e)
                try:
                    debug_info = '\nLog Link: %s' % log_link
                    if response:
                        debug_info += '\nHTML Response: %s' % response
                    LOG.error(debug_info)
                except:
                    LOG.exception('failed to build debug info')

            logs.append(log)
def get_node_manager_api(api_url):
    yarn_cluster = cluster.get_cluster_conf_for_job_submission()
    return NodeManagerApi(api_url, yarn_cluster.SECURITY_ENABLED.get(),
                          yarn_cluster.SSL_CERT_CA_VERIFY.get())
Exemple #35
0
def get_security_default():
  '''Get default security value from Hadoop'''
  from hadoop import cluster # Avoid dependencies conflicts
  cluster = cluster.get_cluster_conf_for_job_submission()

  return cluster.SECURITY_ENABLED.get()
Exemple #36
0
def get_resource_manager_api(api_url):
    return ResourceManagerApi(
        api_url,
        cluster.get_cluster_conf_for_job_submission().SECURITY_ENABLED.get())
Exemple #37
0
def get_node_manager_api(api_url):
  yarn_cluster = cluster.get_cluster_conf_for_job_submission()
  return NodeManagerApi(api_url, yarn_cluster.SECURITY_ENABLED.get(), yarn_cluster.SSL_CERT_CA_VERIFY.get())
Exemple #38
0
def get_resource_manager_api(api_url):
  return ResourceManagerApi(api_url, cluster.get_cluster_conf_for_job_submission().SECURITY_ENABLED.get())
Exemple #39
0
    def get_log_link(self):
        attempt = self.task.job.job_attempts['jobAttempt'][-1]
        log_link = attempt['logsLink']
        if not log_link:
            return log_link, None

        # Generate actual task log link from logsLink url
        if self.task.job.status in ('NEW', 'SUBMITTED', 'RUNNING'):
            logs_path = '/node/containerlogs/'
            node_url, tracking_path = log_link.split(logs_path)
            container_id, user = tracking_path.strip('/').split('/')

            # Replace log path tokens with actual container properties if available
            if hasattr(self, 'nodeHttpAddress') and 'nodeId' in attempt:
                node_url = '%s://%s' % (node_url.split('://')[0],
                                        self.nodeHttpAddress)
            container_id = self.assignedContainerId if hasattr(
                self, 'assignedContainerId') else container_id

            log_link = '%(node_url)s/%(logs_path)s/%(container)s/%(user)s' % {
                'node_url': node_url,
                'logs_path': logs_path.strip('/'),
                'container': container_id,
                'user': user
            }
        else:  # Completed jobs
            if '/node/containerlogs/' in log_link:
                # Applications that use NodeManager API instead of JobHistory API, like new "Oozie Launcher",
                # have `logsLink` URL pointed to NodeManager even for completed jobs
                logs_path = '/node/containerlogs/'

                yarn_cluster = cluster.get_cluster_conf_for_job_submission()
                root_url = yarn_cluster.HISTORY_SERVER_API_URL.get()

                tracking_path = log_link.split(logs_path)[1]
                container_id, user = tracking_path.strip('/').split('/')

                node_url = self.nodeId
                attempt_id = self.id
            else:
                logs_path = '/jobhistory/logs/'
                root_url, tracking_path = log_link.split(logs_path)
                node_url, container_id, attempt_id, user = tracking_path.strip(
                    '/').split('/')

                # Replace log path tokens with actual attempt properties if available
                if hasattr(self, 'nodeHttpAddress') and 'nodeId' in attempt:
                    node_url = '%s:%s' % (self.nodeHttpAddress.split(':')[0],
                                          attempt['nodeId'].split(':')[1])
                container_id = self.assignedContainerId if hasattr(
                    self, 'assignedContainerId') else container_id
                attempt_id = self.attemptId if hasattr(
                    self, 'attemptId') else attempt_id

            log_link = '%(root_url)s/jobhistory/logs/%(node)s/%(container)s/%(attempt)s/%(user)s' % {
                'root_url': root_url,
                'node': node_url,
                'container': container_id,
                'attempt': attempt_id,
                'user': user
            }

        return log_link, user