def _parse_configs(self): mr_cluster = get_cluster_conf_for_job_submission() if mr_cluster is None: mapred_home = None else: mapred_home = mr_cluster.HADOOP_MAPRED_HOME.get() shell_types = [ ] # List of available shell types. For each shell type, we have a nice name (e.g. "Python Shell") and a short name (e.g. "python") for item in shell.conf.SHELL_TYPES.keys(): if mapred_home: env_for_shell = {constants.HADOOP_MAPRED_HOME: mapred_home} else: env_for_shell = {} command = shell.conf.SHELL_TYPES[item].command.get().strip().split( ) nice_name = shell.conf.SHELL_TYPES[item].nice_name.get().strip() executable_exists = utils.executable_exists(command) if executable_exists: self._command_by_short_name[item] = command conf_shell_env = shell.conf.SHELL_TYPES[item].environment for env_variable in conf_shell_env.keys(): env_for_shell[env_variable] = conf_shell_env[ env_variable].value.get() self._env_by_short_name[item] = env_for_shell shell_types.append({ constants.NICE_NAME: nice_name, constants.KEY_NAME: item, constants.EXISTS: executable_exists }) self.shell_types = shell_types
def get_log_client(log_link): global _log_client_queue global MAX_HEAP_SIZE _log_client_lock.acquire() try: components = urlparse.urlsplit(log_link) base_url = '%(scheme)s://%(netloc)s' % { 'scheme': components[0], 'netloc': components[1] } # Takes on form (epoch time, client object) # Least Recently Used algorithm. client_tuple = next( (tup for tup in _log_client_heap if tup[1].base_url == base_url), None) if client_tuple is None: client = HttpClient(base_url, LOG) yarn_cluster = cluster.get_cluster_conf_for_job_submission() if yarn_cluster.SECURITY_ENABLED.get(): client.set_kerberos_auth() else: _log_client_heap.remove(client_tuple) client = client_tuple[1] new_client_tuple = (time.time(), client) if len(_log_client_heap) >= MAX_HEAP_SIZE: heapq.heapreplace(_log_client_heap, new_client_tuple) else: heapq.heappush(_log_client_heap, new_client_tuple) return client finally: _log_client_lock.release()
def get_security(self): principal = self.query_server["principal"] impersonation_enabled = False if principal: kerberos_principal_short_name = principal.split("/", 1)[0] else: kerberos_principal_short_name = None if self.query_server["server_name"] == "impala": cluster_conf = cluster.get_cluster_conf_for_job_submission() use_sasl = cluster_conf is not None and cluster_conf.SECURITY_ENABLED.get() mechanism = HiveServerClient.HS2_MECHANISMS["KERBEROS"] impersonation_enabled = self.query_server["impersonation_enabled"] else: hive_mechanism = hive_site.get_hiveserver2_authentication() if hive_mechanism not in HiveServerClient.HS2_MECHANISMS: raise Exception( _( "%s server authentication not supported. Valid are %s." % (hive_mechanism, HiveServerClient.HS2_MECHANISMS.keys()) ) ) use_sasl = hive_mechanism in ("KERBEROS", "NONE") mechanism = HiveServerClient.HS2_MECHANISMS[hive_mechanism] impersonation_enabled = hive_site.hiveserver2_impersonation_enabled() return use_sasl, mechanism, kerberos_principal_short_name, impersonation_enabled
def get_security(cls, query_server): principal = query_server['principal'] impersonation_enabled = False if query_server['server_name'] == 'impala': cluster_conf = cluster.get_cluster_conf_for_job_submission() use_sasl = cluster_conf is not None and cluster_conf.SECURITY_ENABLED.get() mechanism = HiveServerClient.HS2_MECHANISMS['KERBEROS'] impersonation_enabled = query_server['impersonation_enabled'] else: hive_mechanism = hive_site.get_hiveserver2_authentication() if hive_mechanism not in HiveServerClient.HS2_MECHANISMS: raise Exception(_('%s server authentication not supported. Valid are %s.' % (hive_mechanism, HiveServerClient.HS2_MECHANISMS.keys()))) use_sasl = hive_mechanism in ('KERBEROS', 'NONE') mechanism = 'NOSASL' if use_sasl: mechanism = HiveServerClient.HS2_MECHANISMS[hive_mechanism] impersonation_enabled = hive_site.hiveserver2_impersonation_enabled() if principal: kerberos_principal_short_name = principal.split('/', 1)[0] else: kerberos_principal_short_name = None return use_sasl, mechanism, kerberos_principal_short_name, impersonation_enabled
def get_security(self): principal = self.query_server['principal'] impersonation_enabled = False if principal: kerberos_principal_short_name = principal.split('/', 1)[0] else: kerberos_principal_short_name = None if self.query_server['server_name'] == 'impala': cluster_conf = cluster.get_cluster_conf_for_job_submission() use_sasl = cluster_conf is not None and cluster_conf.SECURITY_ENABLED.get( ) mechanism = HiveServerClient.HS2_MECHANISMS['KERBEROS'] impersonation_enabled = self.query_server['impersonation_enabled'] else: hive_mechanism = hive_site.get_hiveserver2_authentication() if hive_mechanism not in HiveServerClient.HS2_MECHANISMS: raise Exception( _('%s server authentication not supported. Valid are %s.' % (hive_mechanism, HiveServerClient.HS2_MECHANISMS.keys()))) use_sasl = hive_mechanism in ('KERBEROS', 'NONE') mechanism = HiveServerClient.HS2_MECHANISMS[hive_mechanism] impersonation_enabled = hive_site.hiveserver2_impersonation_enabled( ) return use_sasl, mechanism, kerberos_principal_short_name, impersonation_enabled
def get_log_client(log_link): global _log_client_queue global MAX_HEAP_SIZE _log_client_lock.acquire() try: components = urlparse.urlsplit(log_link) base_url = '%(scheme)s://%(netloc)s' % { 'scheme': components[0], 'netloc': components[1] } # Takes on form (epoch time, client object) # Least Recently Used algorithm. client_tuple = next((tup for tup in _log_client_heap if tup[1].base_url == base_url), None) if client_tuple is None: client = HttpClient(base_url, LOG) yarn_cluster = cluster.get_cluster_conf_for_job_submission() if yarn_cluster.SECURITY_ENABLED.get(): client.set_kerberos_auth() else: _log_client_heap.remove(client_tuple) client = client_tuple[1] new_client_tuple = (time.time(), client) if len(_log_client_heap) >= MAX_HEAP_SIZE: heapq.heapreplace(_log_client_heap, new_client_tuple) else: heapq.heappush(_log_client_heap, new_client_tuple) return client finally: _log_client_lock.release()
def get_security(self): principal = self.query_server['principal'] impersonation_enabled = False ldap_username = None ldap_password = get_ldap_password() if ldap_password is not None: # Pass-through LDAP authentication ldap_username = LDAP_USERNAME.get() if principal: kerberos_principal_short_name = principal.split('/', 1)[0] else: kerberos_principal_short_name = None if self.query_server['server_name'] == 'impala': if ldap_password: # Force LDAP auth if ldap_password is provided use_sasl = True mechanism = HiveServerClient.HS2_MECHANISMS['NONE'] else: cluster_conf = cluster.get_cluster_conf_for_job_submission() use_sasl = cluster_conf is not None and cluster_conf.SECURITY_ENABLED.get() mechanism = HiveServerClient.HS2_MECHANISMS['KERBEROS'] impersonation_enabled = self.query_server['impersonation_enabled'] else: hive_mechanism = hive_site.get_hiveserver2_authentication() if hive_mechanism not in HiveServerClient.HS2_MECHANISMS: raise Exception(_('%s server authentication not supported. Valid are %s.') % (hive_mechanism, HiveServerClient.HS2_MECHANISMS.keys())) use_sasl = hive_mechanism in ('KERBEROS', 'NONE', 'LDAP') mechanism = HiveServerClient.HS2_MECHANISMS[hive_mechanism] impersonation_enabled = hive_site.hiveserver2_impersonation_enabled() return use_sasl, mechanism, kerberos_principal_short_name, impersonation_enabled, ldap_username, ldap_password
def config_validator(user): res = [] yarn_cluster = cluster.get_cluster_conf_for_job_submission() if yarn_cluster.SECURITY_ENABLED.get() and not os.path.exists(SQOOP_CONF_DIR.get()): res.append((NICE_NAME, _t("The app won't work without a valid %s property.") % SQOOP_CONF_DIR.grab_key)) return res
def get_log_link(self): attempt = self.task.job.job_attempts['jobAttempt'][-1] log_link = attempt['logsLink'] if not log_link: return log_link, None # Generate actual task log link from logsLink url if self.task.job.status in ('NEW', 'SUBMITTED', 'RUNNING'): logs_path = '/node/containerlogs/' node_url, tracking_path = log_link.split(logs_path) container_id, user = tracking_path.strip('/').split('/') # Replace log path tokens with actual container properties if available if hasattr(self, 'nodeHttpAddress') and 'nodeId' in attempt: node_url = '%s://%s' % (node_url.split('://')[0], self.nodeHttpAddress) container_id = self.assignedContainerId if hasattr(self, 'assignedContainerId') else container_id log_link = '%(node_url)s/%(logs_path)s/%(container)s/%(user)s' % { 'node_url': node_url, 'logs_path': logs_path.strip('/'), 'container': container_id, 'user': user } else: # Completed jobs if '/node/containerlogs/' in log_link: # Applications that use NodeManager API instead of JobHistory API, like new "Oozie Launcher", # have `logsLink` URL pointed to NodeManager even for completed jobs logs_path = '/node/containerlogs/' yarn_cluster = cluster.get_cluster_conf_for_job_submission() root_url = yarn_cluster.HISTORY_SERVER_API_URL.get() tracking_path = log_link.split(logs_path)[1] container_id, user = tracking_path.strip('/').split('/') node_url = self.nodeId attempt_id = self.id else: logs_path = '/jobhistory/logs/' root_url, tracking_path = log_link.split(logs_path) node_url, container_id, attempt_id, user = tracking_path.strip('/').split('/') # Replace log path tokens with actual attempt properties if available if hasattr(self, 'nodeHttpAddress') and 'nodeId' in attempt: node_url = '%s:%s' % (self.nodeHttpAddress.split(':')[0], attempt['nodeId'].split(':')[1]) container_id = self.assignedContainerId if hasattr(self, 'assignedContainerId') else container_id attempt_id = self.attemptId if hasattr(self, 'attemptId') else attempt_id log_link = '%(root_url)s/jobhistory/logs/%(node)s/%(container)s/%(attempt)s/%(user)s' % { 'root_url': root_url, 'node': node_url, 'container': container_id, 'attempt': attempt_id, 'user': user } return log_link, user
def get_mapreduce_api(): global _api_cache if _api_cache is None: _api_cache_lock.acquire() try: if _api_cache is None: yarn_cluster = cluster.get_cluster_conf_for_job_submission() _api_cache = MapreduceApi(yarn_cluster.PROXY_API_URL.get()) finally: _api_cache_lock.release() return _api_cache
def get_history_server_api(): global _api_cache if _api_cache is None: _api_cache_lock.acquire() try: if _api_cache is None: yarn_cluster = cluster.get_cluster_conf_for_job_submission() _api_cache = HistoryServerApi(yarn_cluster.HISTORY_SERVER_API_URL.get(), yarn_cluster.SECURITY_ENABLED.get()) finally: _api_cache_lock.release() return _api_cache
def get_resource_manager(): global _api_cache if _api_cache is None: _api_cache_lock.acquire() try: if _api_cache is None: yarn_cluster = cluster.get_cluster_conf_for_job_submission() _api_cache = ResourceManagerApi(yarn_cluster.RESOURCE_MANAGER_API_URL.get(), yarn_cluster.SECURITY_ENABLED.get()) finally: _api_cache_lock.release() return _api_cache
def get_mapreduce_api(): global _api_cache if _api_cache is None: _api_cache_lock.acquire() try: if _api_cache is None: yarn_cluster = cluster.get_cluster_conf_for_job_submission() _api_cache = MapreduceApi(yarn_cluster.PROXY_API_URL.get(), yarn_cluster.SECURITY_ENABLED.get(), yarn_cluster.SSL_CERT_CA_VERIFY.get()) finally: _api_cache_lock.release() return _api_cache
def get_history_server_api(): global _api_cache if _api_cache is None: _api_cache_lock.acquire() try: if _api_cache is None: yarn_cluster = cluster.get_cluster_conf_for_job_submission() _api_cache = HistoryServerApi( yarn_cluster.HISTORY_SERVER_API_URL.get()) finally: _api_cache_lock.release() return _api_cache
def config_validator(user): res = [] yarn_cluster = cluster.get_cluster_conf_for_job_submission() if yarn_cluster.SECURITY_ENABLED.get() and not os.path.exists( SQOOP_CONF_DIR.get()): res.append( (NICE_NAME, _t("The app won't work without a valid %s property.") % SQOOP_CONF_DIR.grab_key)) return res
def get_mapreduce_api(user): global _api_cache if _api_cache is None: _api_cache_lock.acquire() try: if _api_cache is None: yarn_cluster = cluster.get_cluster_conf_for_job_submission() if yarn_cluster is not None: _api_cache = MapreduceApi(user, yarn_cluster.PROXY_API_URL.get(), yarn_cluster.SECURITY_ENABLED.get(), yarn_cluster.SSL_CERT_CA_VERIFY.get()) finally: _api_cache_lock.release() return _api_cache
def get_resource_manager(username): global API_CACHE if API_CACHE is None: API_CACHE_LOCK.acquire() try: if API_CACHE is None: yarn_cluster = cluster.get_cluster_conf_for_job_submission() if yarn_cluster is None: raise PopupException(_('No Resource Manager are available.')) API_CACHE = ResourceManagerApi(username, yarn_cluster.RESOURCE_MANAGER_API_URL.get(), yarn_cluster.SECURITY_ENABLED.get(), yarn_cluster.SSL_CERT_CA_VERIFY.get()) finally: API_CACHE_LOCK.release() return API_CACHE
def get_resource_manager(): global _api_cache if _api_cache is None: _api_cache_lock.acquire() try: if _api_cache is None: yarn_cluster = cluster.get_cluster_conf_for_job_submission() if yarn_cluster is None: raise PopupException(_('No Resource Manager are available.')) _api_cache = ResourceManagerApi(yarn_cluster.RESOURCE_MANAGER_API_URL.get(), yarn_cluster.SECURITY_ENABLED.get(), yarn_cluster.SSL_CERT_CA_VERIFY.get()) finally: _api_cache_lock.release() return _api_cache
def get_resource_manager(user): global _api_cache if _api_cache is None: _api_cache_lock.acquire() try: if _api_cache is None: yarn_cluster = cluster.get_cluster_conf_for_job_submission() if yarn_cluster is None: raise PopupException(_('No Resource Manager are available.')) _api_cache = ResourceManagerApi(user, yarn_cluster.RESOURCE_MANAGER_API_URL.get(), yarn_cluster.SECURITY_ENABLED.get(), yarn_cluster.SSL_CERT_CA_VERIFY.get()) finally: _api_cache_lock.release() return _api_cache
def get_service_info(service): service_info = {} if service.lower() == 'solr': service_info['url'] = SOLR_URL.get() service_info['security_enabled'] = SOLR_SECURITY_ENABLED.get() if service.lower() == 'oozie': service_info['url'] = OOZIE_URL.get() service_info['security_enabled'] = OOZIE_SECURITY_ENABLED.get() if service.lower() == 'httpfs': hdfs_config = hdfs_conf.HDFS_CLUSTERS['default'] service_info['url'] = hdfs_config.WEBHDFS_URL.get() service_info['security_enabled'] = hdfs_config.SECURITY_ENABLED.get() if service.lower() == 'rm': yarn_cluster = cluster.get_cluster_conf_for_job_submission() service_info['url'] = yarn_cluster.RESOURCE_MANAGER_API_URL.get() service_info['security_enabled'] = yarn_cluster.SECURITY_ENABLED.get() if service.lower() == 'jhs': yarn_cluster = cluster.get_cluster_conf_for_job_submission() service_info['url'] = yarn_cluster.HISTORY_SERVER_API_URL.get() service_info['security_enabled'] = yarn_cluster.SECURITY_ENABLED.get() if service.lower() == 'sparkhs': yarn_cluster = cluster.get_cluster_conf_for_job_submission() service_info['url'] = yarn_cluster.SPARK_HISTORY_SERVER_URL.get() service_info[ 'security_enabled'] = yarn_cluster.SPARK_HISTORY_SERVER_SECURITY_ENABLED.get( ) if 'url' not in service_info: logging.info("Hue does not have %s configured, cannot test %s" % (service, service)) elif service_info['url'] is None: logging.info("Hue does not have %s configured, cannot test %s" % (service, service)) if service_info['url'].endswith('/'): service_info['url'] = service_info['url'][:-1] return service_info
def get_history_server_api(): # TODO: Spark History Server does not yet support setuser, implement when it does global API_CACHE if API_CACHE is None: API_CACHE_LOCK.acquire() try: if API_CACHE is None: yarn_cluster = cluster.get_cluster_conf_for_job_submission() if yarn_cluster is None: raise PopupException(_('No Spark History Server is available.')) API_CACHE = SparkHistoryServerApi(yarn_cluster.SPARK_HISTORY_SERVER_URL.get(), yarn_cluster.SECURITY_ENABLED.get(), yarn_cluster.SSL_CERT_CA_VERIFY.get()) finally: API_CACHE_LOCK.release() return API_CACHE
def get_mapreduce_api(username): global API_CACHE if API_CACHE is None: API_CACHE_LOCK.acquire() try: if API_CACHE is None: yarn_cluster = cluster.get_cluster_conf_for_job_submission() if yarn_cluster is None: raise PopupException(_('No Resource Manager are available.')) API_CACHE = MapreduceApi(yarn_cluster.PROXY_API_URL.get(), yarn_cluster.SECURITY_ENABLED.get(), yarn_cluster.SSL_CERT_CA_VERIFY.get()) finally: API_CACHE_LOCK.release() API_CACHE.setuser(username) # Set the correct user return API_CACHE
def get_history_server_api(username): global API_CACHE if API_CACHE is None: API_CACHE_LOCK.acquire() try: if API_CACHE is None: yarn_cluster = cluster.get_cluster_conf_for_job_submission() if yarn_cluster is None: raise PopupException(_('YARN cluster is not available.')) API_CACHE = HistoryServerApi(yarn_cluster.HISTORY_SERVER_API_URL.get(), yarn_cluster.SECURITY_ENABLED.get(), yarn_cluster.SSL_CERT_CA_VERIFY.get()) finally: API_CACHE_LOCK.release() API_CACHE.setuser(username) # Set the correct user return API_CACHE
def get_history_server_api(): # TODO: Spark History Server does not yet support setuser, implement when it does global API_CACHE if API_CACHE is None: API_CACHE_LOCK.acquire() try: if API_CACHE is None: yarn_cluster = cluster.get_cluster_conf_for_job_submission() if yarn_cluster is None: raise PopupException(_('No Spark History Server is available.')) API_CACHE = SparkHistoryServerApi(yarn_cluster.SPARK_HISTORY_SERVER_URL.get(), yarn_cluster.SPARK_HISTORY_SERVER_SECURITY_ENABLED.get(), yarn_cluster.SSL_CERT_CA_VERIFY.get()) finally: API_CACHE_LOCK.release() return API_CACHE
def get_metastore(): """ get_metastore() -> (is_local, host, port, kerberos_principal) Look at both hive-site.xml and beeswax.conf, and return the metastore information. hive-site.xml supersedes beeswax.conf. - If hive-site says local metastore (default), then get host & port from beeswax.conf. - If hive-site says remote, then use the URI specified there, so that we don't need to configure things twice. """ global _METASTORE_LOC_CACHE if not _METASTORE_LOC_CACHE: thrift_uris = get_conf().get(_CNF_METASTORE_URIS) is_local = thrift_uris is None or thrift_uris == '' if is_local: cluster_conf = cluster.get_cluster_conf_for_job_submission() use_sasl = cluster_conf is not None and cluster_conf.SECURITY_ENABLED.get( ) host = beeswax.conf.BEESWAX_META_SERVER_HOST.get() port = beeswax.conf.BEESWAX_META_SERVER_PORT.get() kerberos_principal = security_util.get_kerberos_principal( KERBEROS.HUE_PRINCIPAL.get(), socket.getfqdn()) else: use_sasl = str(get_conf().get(_CNF_METASTORE_SASL, 'false')).lower() == 'true' thrift_uri = thrift_uris.split(",")[0] host, port = 'undefined', '0' match = _THRIFT_URI_RE.match(thrift_uri) if not match: LOG.fatal('Cannot understand remote metastore uri "%s"' % thrift_uri) else: host, port = match.groups() kerberos_principal = security_util.get_kerberos_principal( get_conf().get(_CNF_METASTORE_KERBEROS_PRINCIPAL, None), socket.getfqdn()) kerberos_principal_components = security_util.get_components( kerberos_principal) if use_sasl and len(kerberos_principal_components) == 3: host = kerberos_principal_components[1] _METASTORE_LOC_CACHE = (is_local, host, int(port), kerberos_principal) return _METASTORE_LOC_CACHE
def get_mapreduce_api(username): global API_CACHE if API_CACHE is None: API_CACHE_LOCK.acquire() try: if API_CACHE is None: yarn_cluster = cluster.get_cluster_conf_for_job_submission() if yarn_cluster is None: raise PopupException( _('No Resource Manager are available.')) API_CACHE = MapreduceApi(yarn_cluster.PROXY_API_URL.get(), yarn_cluster.SECURITY_ENABLED.get(), yarn_cluster.SSL_CERT_CA_VERIFY.get()) finally: API_CACHE_LOCK.release() API_CACHE.setuser(username) # Set the correct user return API_CACHE
def _resolve_tracking_url(self): resp = None try: self._client = HttpClient(self.trackingUrl, logger=LOG) self._root = Resource(self._client) yarn_cluster = cluster.get_cluster_conf_for_job_submission() self._security_enabled = yarn_cluster.SECURITY_ENABLED.get() if self._security_enabled: self._client.set_kerberos_auth() self._client.set_verify(yarn_cluster.SSL_CERT_CA_VERIFY.get()) actual_url = self._execute(self._root.resolve_redirect_url) if actual_url.strip('/').split('/')[-1] == 'jobs': actual_url = actual_url.strip('/').replace('jobs', '') self.trackingUrl = actual_url LOG.debug("SparkJob tracking URL: %s" % self.trackingUrl) except Exception, e: LOG.warn("Failed to resolve Spark Job's actual tracking URL: %s" % e)
def _resolve_tracking_url(self): resp = None try: self._client = HttpClient(self.trackingUrl, logger=LOG) self._root = Resource(self._client) yarn_cluster = cluster.get_cluster_conf_for_job_submission() self._security_enabled = yarn_cluster.SECURITY_ENABLED.get() if self._security_enabled: self._client.set_kerberos_auth() self._client.set_verify(yarn_cluster.SSL_CERT_CA_VERIFY.get()) actual_url = self._execute(self._root.resolve_redirect_url()) if actual_url.strip('/').split('/')[-1] == 'jobs': actual_url = actual_url.strip('/').replace('jobs', '') self.trackingUrl = actual_url LOG.debug("SparkJob tracking URL: %s" % self.trackingUrl) except Exception, e: LOG.warn("Failed to resolve Spark Job's actual tracking URL: %s" % e)
def _parse_configs(self): mr_cluster = get_cluster_conf_for_job_submission() if mr_cluster is None: mapred_home = None else: mapred_home = mr_cluster.HADOOP_MAPRED_HOME.get() shell_types = [] # List of available shell types. For each shell type, we have a nice name (e.g. "Python Shell") and a short name (e.g. "python") for item in shell.conf.SHELL_TYPES.keys(): env_for_shell = { constants.HADOOP_MAPRED_HOME: mapred_home } command = shell.conf.SHELL_TYPES[item].command.get().strip().split() nice_name = shell.conf.SHELL_TYPES[item].nice_name.get().strip() executable_exists = utils.executable_exists(command) if executable_exists: self._command_by_short_name[item] = command conf_shell_env = shell.conf.SHELL_TYPES[item].environment for env_variable in conf_shell_env.keys(): env_for_shell[env_variable] = conf_shell_env[env_variable].value.get() self._env_by_short_name[item] = env_for_shell shell_types.append({ constants.NICE_NAME: nice_name, constants.KEY_NAME: item, constants.EXISTS:executable_exists }) self.shell_types = shell_types
def get_metastore(): """ get_metastore() -> (is_local, host, port, kerberos_principal) Look at both hive-site.xml and beeswax.conf, and return the metastore information. hive-site.xml supersedes beeswax.conf. - If hive-site says local metastore (default), then get host & port from beeswax.conf. - If hive-site says remote, then use the URI specified there, so that we don't need to configure things twice. """ global _METASTORE_LOC_CACHE if not _METASTORE_LOC_CACHE: thrift_uris = get_conf().get(_CNF_METASTORE_URIS) is_local = thrift_uris is None or thrift_uris == '' if is_local: cluster_conf = cluster.get_cluster_conf_for_job_submission() use_sasl = cluster_conf is not None and cluster_conf.SECURITY_ENABLED.get() host = beeswax.conf.BEESWAX_META_SERVER_HOST.get() port = beeswax.conf.BEESWAX_META_SERVER_PORT.get() kerberos_principal = security_util.get_kerberos_principal(KERBEROS.HUE_PRINCIPAL.get(), socket.getfqdn()) else: use_sasl = str(get_conf().get(_CNF_METASTORE_SASL, 'false')).lower() == 'true' thrift_uri = thrift_uris.split(",")[0] host, port = 'undefined', '0' match = _THRIFT_URI_RE.match(thrift_uri) if not match: LOG.fatal('Cannot understand remote metastore uri "%s"' % thrift_uri) else: host, port = match.groups() kerberos_principal = security_util.get_kerberos_principal(get_conf().get(_CNF_METASTORE_KERBEROS_PRINCIPAL, None), socket.getfqdn()) kerberos_principal_components = security_util.get_components(kerberos_principal) if use_sasl and len(kerberos_principal_components) == 3: host = kerberos_principal_components[1] _METASTORE_LOC_CACHE = (is_local, host, int(port), kerberos_principal) return _METASTORE_LOC_CACHE
def is_enabled(): from hadoop import cluster # Avoid dependencies conflicts cluster = cluster.get_cluster_conf_for_job_submission() return HOSTNAME.get() != 'localhost' and cluster.SECURITY_ENABLED.get()
def get_task_log(self, offset=0): logs = [] attempt = self.task.job.job_attempts['jobAttempt'][-1] log_link = attempt['logsLink'] # Generate actual task log link from logsLink url if self.task.job.status in ('NEW', 'SUBMITTED', 'RUNNING'): logs_path = '/node/containerlogs/' node_url, tracking_path = log_link.split(logs_path) container_id, user = tracking_path.strip('/').split('/') # Replace log path tokens with actual container properties if available if hasattr(self, 'nodeHttpAddress') and 'nodeId' in attempt: node_url = '%s://%s' % (node_url.split('://')[0], self.nodeHttpAddress) container_id = self.assignedContainerId if hasattr( self, 'assignedContainerId') else container_id log_link = '%(node_url)s/%(logs_path)s/%(container)s/%(user)s' % { 'node_url': node_url, 'logs_path': logs_path.strip('/'), 'container': container_id, 'user': user } else: # Completed jobs if '/node/containerlogs/' in log_link: # Applications that use NodeManager API instead of JobHistory API, like new "Oozie Launcher", # have `logsLink` URL pointed to NodeManager even for completed jobs logs_path = '/node/containerlogs/' yarn_cluster = cluster.get_cluster_conf_for_job_submission() root_url = yarn_cluster.HISTORY_SERVER_API_URL.get() tracking_path = log_link.split(logs_path)[1] container_id, user = tracking_path.strip('/').split('/') node_url = self.nodeId attempt_id = self.id else: logs_path = '/jobhistory/logs/' root_url, tracking_path = log_link.split(logs_path) node_url, container_id, attempt_id, user = tracking_path.strip( '/').split('/') # Replace log path tokens with actual attempt properties if available if hasattr(self, 'nodeHttpAddress') and 'nodeId' in attempt: node_url = '%s:%s' % (self.nodeHttpAddress.split(':')[0], attempt['nodeId'].split(':')[1]) container_id = self.assignedContainerId if hasattr( self, 'assignedContainerId') else container_id attempt_id = self.attemptId if hasattr( self, 'attemptId') else attempt_id log_link = '%(root_url)s/jobhistory/logs/%(node)s/%(container)s/%(attempt)s/%(user)s' % { 'root_url': root_url, 'node': node_url, 'container': container_id, 'attempt': attempt_id, 'user': user } for name in ('stdout', 'stderr', 'syslog'): link = '/%s/' % name if self.type == 'Oozie Launcher' and not self.task.job.status == 'FINISHED': # Yarn currently dumps with 500 error with doas in running state params = {} else: params = {'doAs': user} if int(offset) != 0: params['start'] = offset else: params['start'] = 0 response = None try: log_link = re.sub('job_[^/]+', str(self.id), log_link) root = Resource(get_log_client(log_link), urlparse.urlsplit(log_link)[2], urlencode=False) response = root.get(link, params=params) log = html.fromstring( response, parser=html.HTMLParser()).xpath( '/html/body/table/tbody/tr/td[2]')[0].text_content() except Exception, e: log = _('Failed to retrieve log: %s' % e) try: debug_info = '\nLog Link: %s' % log_link if response: debug_info += '\nHTML Response: %s' % response LOG.error(debug_info) except: LOG.exception('failed to build debug info') logs.append(log)
def get_node_manager_api(api_url): yarn_cluster = cluster.get_cluster_conf_for_job_submission() return NodeManagerApi(api_url, yarn_cluster.SECURITY_ENABLED.get(), yarn_cluster.SSL_CERT_CA_VERIFY.get())
def get_security_default(): '''Get default security value from Hadoop''' from hadoop import cluster # Avoid dependencies conflicts cluster = cluster.get_cluster_conf_for_job_submission() return cluster.SECURITY_ENABLED.get()
def get_resource_manager_api(api_url): return ResourceManagerApi( api_url, cluster.get_cluster_conf_for_job_submission().SECURITY_ENABLED.get())
def get_resource_manager_api(api_url): return ResourceManagerApi(api_url, cluster.get_cluster_conf_for_job_submission().SECURITY_ENABLED.get())
def get_log_link(self): attempt = self.task.job.job_attempts['jobAttempt'][-1] log_link = attempt['logsLink'] if not log_link: return log_link, None # Generate actual task log link from logsLink url if self.task.job.status in ('NEW', 'SUBMITTED', 'RUNNING'): logs_path = '/node/containerlogs/' node_url, tracking_path = log_link.split(logs_path) container_id, user = tracking_path.strip('/').split('/') # Replace log path tokens with actual container properties if available if hasattr(self, 'nodeHttpAddress') and 'nodeId' in attempt: node_url = '%s://%s' % (node_url.split('://')[0], self.nodeHttpAddress) container_id = self.assignedContainerId if hasattr( self, 'assignedContainerId') else container_id log_link = '%(node_url)s/%(logs_path)s/%(container)s/%(user)s' % { 'node_url': node_url, 'logs_path': logs_path.strip('/'), 'container': container_id, 'user': user } else: # Completed jobs if '/node/containerlogs/' in log_link: # Applications that use NodeManager API instead of JobHistory API, like new "Oozie Launcher", # have `logsLink` URL pointed to NodeManager even for completed jobs logs_path = '/node/containerlogs/' yarn_cluster = cluster.get_cluster_conf_for_job_submission() root_url = yarn_cluster.HISTORY_SERVER_API_URL.get() tracking_path = log_link.split(logs_path)[1] container_id, user = tracking_path.strip('/').split('/') node_url = self.nodeId attempt_id = self.id else: logs_path = '/jobhistory/logs/' root_url, tracking_path = log_link.split(logs_path) node_url, container_id, attempt_id, user = tracking_path.strip( '/').split('/') # Replace log path tokens with actual attempt properties if available if hasattr(self, 'nodeHttpAddress') and 'nodeId' in attempt: node_url = '%s:%s' % (self.nodeHttpAddress.split(':')[0], attempt['nodeId'].split(':')[1]) container_id = self.assignedContainerId if hasattr( self, 'assignedContainerId') else container_id attempt_id = self.attemptId if hasattr( self, 'attemptId') else attempt_id log_link = '%(root_url)s/jobhistory/logs/%(node)s/%(container)s/%(attempt)s/%(user)s' % { 'root_url': root_url, 'node': node_url, 'container': container_id, 'attempt': attempt_id, 'user': user } return log_link, user