def __init__(self, kernel_manager, proxy_config): super(YarnClusterProcessProxy, self).__init__(kernel_manager, proxy_config) self.application_id = None self.yarn_endpoint \ = proxy_config.get('yarn_endpoint', kernel_manager.parent.parent.yarn_endpoint) self.yarn_endpoint_security_enabled \ = proxy_config.get('yarn_endpoint_security_enabled', kernel_manager.parent.parent.yarn_endpoint_security_enabled) yarn_url = urlparse(self.yarn_endpoint) yarn_master = yarn_url.hostname yarn_port = yarn_url.port if self.yarn_endpoint_security_enabled is True: self.resource_mgr = ResourceManager( address=yarn_master, port=yarn_port, kerberos_enabled=self.yarn_endpoint_security_enabled) else: self.resource_mgr = ResourceManager(address=yarn_master, port=yarn_port) # YARN applications tend to take longer than the default 5 second wait time. Rather than # require a command-line option for those using YARN, we'll adjust based on a local env that # defaults to 15 seconds. Note: we'll only adjust if the current wait time is shorter than # the desired value. if kernel_manager.shutdown_wait_time < yarn_shutdown_wait_time: kernel_manager.shutdown_wait_time = yarn_shutdown_wait_time self.log.debug( "{class_name} shutdown wait time adjusted to {wait_time} seconds." .format(class_name=type(self).__name__, wait_time=kernel_manager.shutdown_wait_time))
def __init__(self, kernel_manager, proxy_config): super(YarnClusterProcessProxy, self).__init__(kernel_manager, proxy_config) self.application_id = None self.yarn_endpoint \ = proxy_config.get('yarn_endpoint', kernel_manager.parent.parent.yarn_endpoint) self.yarn_endpoint_security_enabled \ = proxy_config.get('yarn_endpoint_security_enabled', kernel_manager.parent.parent.yarn_endpoint_security_enabled) yarn_master = urlparse(self.yarn_endpoint).hostname if self.yarn_endpoint_security_enabled is True: self.resource_mgr = ResourceManager(address=yarn_master, kerberos_enabled=self.yarn_endpoint_security_enabled) else: self.resource_mgr = ResourceManager(address=yarn_master)
def _initialize_resource_manager(self, **kwargs): """Initialize the Hadoop YARN Resource Manager instance used for this kernel's lifecycle.""" endpoints = None if self.yarn_endpoint: endpoints = [self.yarn_endpoint] # Only check alternate if "primary" is set. if self.alt_yarn_endpoint: endpoints.append(self.alt_yarn_endpoint) if self.yarn_endpoint_security_enabled: from requests_kerberos import HTTPKerberosAuth auth = HTTPKerberosAuth() else: # If we have the appropriate version of yarn-api-client, use its SimpleAuth class. # This allows EG to continue to issue requests against the YARN api when anonymous # access is not allowed. (Default is to allow anonymous access.) try: from yarn_api_client.auth import SimpleAuth kernel_username = KernelSessionManager.get_kernel_username( **kwargs) auth = SimpleAuth(kernel_username) self.log.debug( f"Using SimpleAuth with '{kernel_username}' against endpoints: {endpoints}" ) except ImportError: auth = None self.resource_mgr = ResourceManager(service_endpoints=endpoints, auth=auth, verify=cert_path) self.rm_addr = self.resource_mgr.get_active_endpoint()
def __init__(self, **kwargs): super().__init__(**kwargs) self.application_id = None self.last_known_state = None self.candidate_queue = None self.candidate_partition = None endpoints = None if self.yarn_endpoint: endpoints = [self.yarn_endpoint] # Only check alternate if "primary" is set. if self.alt_yarn_endpoint: endpoints.append(self.alt_yarn_endpoint) auth = None if self.yarn_endpoint_security_enabled: from requests_kerberos import HTTPKerberosAuth auth = HTTPKerberosAuth() self.resource_mgr = ResourceManager(service_endpoints=endpoints, auth=auth, verify=cert_path) self.rm_addr = self.resource_mgr.get_active_endpoint() # If yarn resource check is enabled and it isn't available immediately, # 20% of kernel_launch_timeout is used to wait # and retry at fixed interval before pronouncing as not feasible to launch. self.yarn_resource_check_wait_time = 0.20 * self.launch_timeout
def __init__(self, kernel_manager, proxy_config): super(YarnClusterProcessProxy, self).__init__(kernel_manager, proxy_config) self.application_id = None self.yarn_endpoint = proxy_config.get( 'yarn_endpoint', kernel_manager.parent.parent.yarn_endpoint) yarn_master = urlparse(self.yarn_endpoint).hostname self.resource_mgr = ResourceManager(address=yarn_master)
def __init__(self, kernel_manager, proxy_config): super(YarnClusterProcessProxy, self).__init__(kernel_manager, proxy_config) self.application_id = None self.last_known_state = None self.candidate_queue = None self.candidate_partition = None self.local_proc = None self.pid = None self.ip = None self.yarn_endpoint \ = proxy_config.get('yarn_endpoint', kernel_manager.yarn_endpoint) self.alt_yarn_endpoint \ = proxy_config.get('alt_yarn_endpoint', kernel_manager.alt_yarn_endpoint) self.yarn_endpoint_security_enabled \ = proxy_config.get('yarn_endpoint_security_enabled', kernel_manager.yarn_endpoint_security_enabled) endpoints = None if self.yarn_endpoint: endpoints = [self.yarn_endpoint] # Only check alternate if "primary" is set. if self.alt_yarn_endpoint: endpoints.append(self.alt_yarn_endpoint) auth = None if self.yarn_endpoint_security_enabled: from requests_kerberos import HTTPKerberosAuth auth = HTTPKerberosAuth() self.resource_mgr = ResourceManager(service_endpoints=endpoints, auth=auth, verify=cert_path) self.rm_addr = self.resource_mgr.get_active_endpoint() # YARN applications tend to take longer than the default 5 second wait time. Rather than # require a command-line option for those using YARN, we'll adjust based on a local env that # defaults to 15 seconds. Note: we'll only adjust if the current wait time is shorter than # the desired value. if kernel_manager.shutdown_wait_time < yarn_shutdown_wait_time: kernel_manager.shutdown_wait_time = yarn_shutdown_wait_time self.log.debug( "{class_name} shutdown wait time adjusted to {wait_time} seconds." .format(class_name=type(self).__name__, wait_time=kernel_manager.shutdown_wait_time)) # If yarn resource check is enabled and it isn't available immediately, # 20% of kernel_launch_timeout is used to wait # and retry at fixed interval before pronouncing as not feasible to launch. self.yarn_resource_check_wait_time = 0.20 * self.kernel_launch_timeout
def setUpClass(self): self.configured = False if os.getenv('YARN_ENDPOINT'): yarn_endpoint = os.getenv('YARN_ENDPOINT') yarn_endpoint_uri = urlparse(yarn_endpoint) if yarn_endpoint_uri.hostname and yarn_endpoint_uri.port: self.configured = True self.resourceManager = ResourceManager( yarn_endpoint_uri.hostname, yarn_endpoint_uri.port)
def __init__(self, kernel_manager, lifecycle_config): super(YarnKernelLifecycleManager, self).__init__(kernel_manager, lifecycle_config) self.application_id = None self.rm_addr = None # We'd like to have the kernel.json values override the globally configured values but because # 'null' is the default value for these (and means to go with the local endpoint), we really # can't do that elegantly. This means that the global setting will be used only if the kernel.json # value is 'null' (None). For those configurations that want to use the local endpoint, they should # just avoid setting these altogether. self.yarn_endpoint = lifecycle_config.get( 'yarn_endpoint', kernel_manager.provider_config.get('yarn_endpoint')) self.alt_yarn_endpoint = lifecycle_config.get( 'alt_yarn_endpoint', kernel_manager.provider_config.get('alt_yarn_endpoint')) self.yarn_endpoint_security_enabled = lifecycle_config.get( 'yarn_endpoint_security_enabled', kernel_manager.provider_config.get( 'yarn_endpoint_security_enabled', False)) endpoints = None if self.yarn_endpoint: endpoints = [self.yarn_endpoint] # Only check alternate if "primary" is set. if self.alt_yarn_endpoint: endpoints.append(self.alt_yarn_endpoint) auth = None if self.yarn_endpoint_security_enabled: from requests_kerberos import HTTPKerberosAuth auth = HTTPKerberosAuth() self.resource_mgr = ResourceManager(service_endpoints=endpoints, auth=auth) self.rm_addr = self.resource_mgr.get_active_endpoint() # TODO - fix wait time - should just add member to k-m. # YARN applications tend to take longer than the default 5 second wait time. Rather than # require a command-line option for those using YARN, we'll adjust based on a local env that # defaults to 15 seconds. Note: we'll only adjust if the current wait time is shorter than # the desired value. if kernel_manager.shutdown_wait_time < yarn_shutdown_wait_time: kernel_manager.shutdown_wait_time = yarn_shutdown_wait_time self.log.debug( "{class_name} shutdown wait time adjusted to {wait_time} seconds." .format(class_name=type(self).__name__, wait_time=kernel_manager.shutdown_wait_time))
def __init__(self, kernel_manager, proxy_config): super(YarnClusterProcessProxy, self).__init__(kernel_manager, proxy_config) self.application_id = None self.rm_addr = None self.yarn_endpoint \ = proxy_config.get('yarn_endpoint', kernel_manager.parent.parent.yarn_endpoint) self.alt_yarn_endpoint \ = proxy_config.get('alt_yarn_endpoint', kernel_manager.parent.parent.alt_yarn_endpoint) self.yarn_endpoint_security_enabled \ = proxy_config.get('yarn_endpoint_security_enabled', kernel_manager.parent.parent.yarn_endpoint_security_enabled) yarn_master = alt_yarn_master = None yarn_port = alt_yarn_port = None if self.yarn_endpoint: yarn_url = urlparse(self.yarn_endpoint) yarn_master = yarn_url.hostname yarn_port = yarn_url.port # Only check alternate if "primary" is set. if self.alt_yarn_endpoint: alt_yarn_url = urlparse(self.alt_yarn_endpoint) alt_yarn_master = alt_yarn_url.hostname alt_yarn_port = alt_yarn_url.port self.resource_mgr = ResourceManager( address=yarn_master, port=yarn_port, alt_address=alt_yarn_master, alt_port=alt_yarn_port, kerberos_enabled=self.yarn_endpoint_security_enabled) host, port = self.resource_mgr.get_active_host_port() self.rm_addr = host + ':' + str(port) # YARN applications tend to take longer than the default 5 second wait time. Rather than # require a command-line option for those using YARN, we'll adjust based on a local env that # defaults to 15 seconds. Note: we'll only adjust if the current wait time is shorter than # the desired value. if kernel_manager.shutdown_wait_time < yarn_shutdown_wait_time: kernel_manager.shutdown_wait_time = yarn_shutdown_wait_time self.log.debug( "{class_name} shutdown wait time adjusted to {wait_time} seconds." .format(class_name=type(self).__name__, wait_time=kernel_manager.shutdown_wait_time)) # If yarn resource check is enabled and it isn't available immediately, # 20% of kernel_launch_timeout is used to wait # and retry at fixed interval before pronouncing as not feasible to launch. self.yarn_resource_check_wait_time = 0.20 * self.kernel_launch_timeout
def getAppElapsedTime(): appTimes = [] rm = ResourceManager(address='localhost', port=8088) data = getYarnApplicationsData(rm) if data: try: apps = data['apps'] if apps: appList = apps['app'] for app in appList: appTimes.append((app['id'], app['elapsedTime'])) except KeyError: pass return appTimes
def getYarnApps(): yarnApps = [] rm = ResourceManager(address='localhost', port=8088) data = getYarnApplicationsData(rm) if data: try: apps = data['apps'] if apps: appList = apps['app'] for app in appList: url = app['trackingUrl'] port_path = url.split(':')[2] yarnApps.append( YarnApp._make((app['id'], app['name'], port_path, app['state'], app['elapsedTime']))) except KeyError: pass return yarnApps
def __init__(self, kernel_manager, proxy_config): super(YarnClusterProcessProxy, self).__init__(kernel_manager, proxy_config) self.application_id = None self.rm_addr = None self.yarn_endpoint \ = proxy_config.get('yarn_endpoint', kernel_manager.parent.parent.yarn_endpoint) self.yarn_endpoint_security_enabled \ = proxy_config.get('yarn_endpoint_security_enabled', kernel_manager.parent.parent.yarn_endpoint_security_enabled) yarn_master = None yarn_port = None if self.yarn_endpoint: yarn_url = urlparse(self.yarn_endpoint) yarn_master = yarn_url.hostname yarn_port = yarn_url.port self.resource_mgr = ResourceManager( address=yarn_master, port=yarn_port, kerberos_enabled=self.yarn_endpoint_security_enabled) # Temporary until yarn-api-client can be extended to return host-port info when yarn_master is None. self.rm_addr = yarn_master + ':' + str( yarn_port) if yarn_master is not None else '(see yarn-site.xml)' # YARN applications tend to take longer than the default 5 second wait time. Rather than # require a command-line option for those using YARN, we'll adjust based on a local env that # defaults to 15 seconds. Note: we'll only adjust if the current wait time is shorter than # the desired value. if kernel_manager.shutdown_wait_time < yarn_shutdown_wait_time: kernel_manager.shutdown_wait_time = yarn_shutdown_wait_time self.log.debug( "{class_name} shutdown wait time adjusted to {wait_time} seconds." .format(class_name=type(self).__name__, wait_time=kernel_manager.shutdown_wait_time))
def test__init__(self, get_config_mock, request_mock): get_config_mock.return_value = "https:localhost" rm = ResourceManager() get_config_mock.assert_called_with(30, None, True) self.assertEqual(rm.service_uri.is_https, True)
def setUp(self, check_is_active_rm_mock): check_is_active_rm_mock.return_value = True self.rm = ResourceManager(['localhost'])
def __init__(self, kernel_manager): super(YarnClusterProcessProxy, self).__init__(kernel_manager) self.application_id = None self.yarn_endpoint = kernel_manager.parent.parent.yarn_endpoint # from command line or env yarn_master = urlparse(self.yarn_endpoint).hostname self.resource_mgr = ResourceManager(address=yarn_master)
def test__init__(self, get_config_mock, request_mock): get_config_mock.return_value = (None, None) ResourceManager() get_config_mock.assert_called_with()
def setUp(self): self.rm = ResourceManager('localhost')