def _drain_hosts(self, drainable_hosts): """"Drains tasks from the specified hosts. This will move active tasks on these hosts to the DRAINING state, causing them to be rescheduled elsewhere. :param drainable_hosts: Hosts that are in maintenance mode and ready to be drained :type drainable_hosts: gen.apache.aurora.ttypes.Hosts :rtype: set of host names failed to drain """ check_and_log_response(self._client.drain_hosts(drainable_hosts)) drainable_hostnames = [hostname for hostname in drainable_hosts.hostNames] total_wait = self.STATUS_POLL_INTERVAL not_drained_hostnames = set(drainable_hostnames) while not self._wait_event.is_set() and not_drained_hostnames: log.info("Waiting for hosts to be in DRAINED: %s" % not_drained_hostnames) self._wait_event.wait(self.STATUS_POLL_INTERVAL.as_(Time.SECONDS)) statuses = self.check_status(list(not_drained_hostnames)) not_drained_hostnames = set(h[0] for h in statuses if h[1] != "DRAINED") total_wait += self.STATUS_POLL_INTERVAL if not_drained_hostnames and total_wait > self.MAX_STATUS_WAIT: log.warning( "Failed to move all hosts into DRAINED within %s:\n%s" % ( self.MAX_STATUS_WAIT, "\n".join("\tHost:%s\tStatus:%s" % h for h in sorted(statuses) if h[1] != "DRAINED"), ) ) break return not_drained_hostnames
def _apply_states(self): """ os.stat() the corresponding checkpoint stream of this task and determine if there are new ckpt records. Attempt to read those records and update the high watermark for that stream. Returns True if new states were applied, False otherwise. """ ckpt_offset = None try: ckpt_offset = os.stat(self._runner_ckpt).st_size updated = False if self._ckpt_head < ckpt_offset: with open(self._runner_ckpt, "r") as fp: fp.seek(self._ckpt_head) rr = ThriftRecordReader(fp, RunnerCkpt) while True: runner_update = rr.try_read() if not runner_update: break try: self._dispatcher.dispatch(self._runnerstate, runner_update) except CheckpointDispatcher.InvalidSequenceNumber as e: log.error("Checkpoint stream is corrupt: %s" % e) break new_ckpt_head = fp.tell() updated = self._ckpt_head != new_ckpt_head self._ckpt_head = new_ckpt_head return updated except OSError as e: if e.errno == errno.ENOENT: # The log doesn't yet exist, will retry later. log.warning("Could not read from checkpoint %s" % self._runner_ckpt) return False else: raise
def _fast_forward_stream(self, process_name): log.debug('Fast forwarding %s stream to seq=%s' % (process_name, self._watermarks[process_name])) assert self._processes.get(process_name) is not None fp = self._processes[process_name] rr = ThriftRecordReader(fp, RunnerCkpt) current_watermark = -1 records = 0 while current_watermark < self._watermarks[process_name]: last_pos = fp.tell() record = rr.try_read() if record is None: break new_watermark = record.process_status.seq if new_watermark > self._watermarks[process_name]: log.debug( 'Over-seeked %s [watermark = %s, high watermark = %s], rewinding.' % (process_name, new_watermark, self._watermarks[process_name])) fp.seek(last_pos) break current_watermark = new_watermark records += 1 if current_watermark < self._watermarks[process_name]: log.warning( 'Only able to fast forward to %s@sequence=%s, high watermark is %s' % (process_name, current_watermark, self._watermarks[process_name])) if records: log.debug('Fast forwarded %s %s record(s) to seq=%s.' % (process_name, records, current_watermark))
def method_wrapper(*args): with self._lock: start = time.time() while not self._terminating.is_set() and ( time.time() - start) < self.RPC_MAXIMUM_WAIT.as_(Time.SECONDS): # Only automatically append a SessionKey if this is not part of the read-only API. auth_args = () if hasattr(ReadOnlyScheduler.Iface, method_name) else (self.session_key(),) try: method = getattr(self.client(), method_name) if not callable(method): return method resp = method(*(args + auth_args)) if resp is not None and resp.responseCode == ResponseCode.ERROR_TRANSIENT: raise self.TransientError(", ".join( [m.message for m in resp.details] if resp.details else [])) if resp.serverInfo.thriftAPIVersion != THRIFT_API_VERSION: raise self.APIVersionError("Client Version: %s, Server Version: %s" % (THRIFT_API_VERSION, resp.serverInfo.thriftAPIVersion)) return resp except (TTransport.TTransportException, self.TimeoutError, self.TransientError) as e: if not self._terminating.is_set(): log.warning('Connection error with scheduler: %s, reconnecting...' % e) self.invalidate() self._terminating.wait(self.RPC_RETRY_INTERVAL.as_(Time.SECONDS)) except Exception as e: # Take any error that occurs during the RPC call, and transform it # into something clients can handle. if not self._terminating.is_set(): raise self.ThriftInternalError("Error during thrift call %s to %s: %s" % (method_name, self.cluster.name, e)) if not self._terminating.is_set(): raise self.TimeoutError('Timed out attempting to issue %s to %s' % ( method_name, self.cluster.name))
def on_finish(service_instance): try: self._members[member_id] = ServiceInstance.unpack(service_instance) except Exception as e: log.warning('Failed to deserialize endpoint: %s' % e) return self._on_join(self._members[member_id])
def _drain_hosts(self, drainable_hosts): """"Drains tasks from the specified hosts. This will move active tasks on these hosts to the DRAINING state, causing them to be rescheduled elsewhere. :param drainable_hosts: Hosts that are in maintenance mode and ready to be drained :type drainable_hosts: gen.apache.aurora.ttypes.Hosts """ check_and_log_response(self._client.drain_hosts(drainable_hosts)) not_ready_hostnames = [ hostname for hostname in drainable_hosts.hostNames ] while not_ready_hostnames: resp = self._client.maintenance_status( Hosts(set(not_ready_hostnames))) if not resp.result.maintenanceStatusResult.statuses: not_ready_hostnames = None for host_status in resp.result.maintenanceStatusResult.statuses: if host_status.mode != MaintenanceMode.DRAINED: log.warning( '%s is currently in status %s' % (host_status.host, MaintenanceMode._VALUES_TO_NAMES[host_status.mode])) else: not_ready_hostnames.remove(host_status.host)
def setup_child_subreaping(): """ This uses the prctl(2) syscall to set the `PR_SET_CHILD_SUBREAPER` flag. This means if any children processes need to be reparented, they will be reparented to this process. More documentation here: http://man7.org/linux/man-pages/man2/prctl.2.html and here: https://lwn.net/Articles/474787/ Callers should reap terminal children to prevent zombies. """ log.debug("Calling prctl(2) with PR_SET_CHILD_SUBREAPER") # This constant is taken from prctl.h PR_SET_CHILD_SUBREAPER = 36 try: library_name = ctypes.util.find_library('c') if library_name is None: log.warning("libc is not found. Unable to call prctl!") log.warning("Children subreaping is disabled!") return libc = ctypes.CDLL(library_name, use_errno=True) # If we are on a system where prctl doesn't exist, this will throw an # attribute error. ret = libc.prctl(PR_SET_CHILD_SUBREAPER, 1, 0, 0, 0) if ret != 0: errno = ctypes.get_errno() raise OSError(errno, os.strerror(errno)) except Exception as e: log.error("Unable to call prctl %s" % e) log.error("Children subreaping is disabled!")
def allocate_port(self, name, port=None): if port is not None: if name in self._ports and self._ports[name] != port: raise EphemeralPortAllocator.PortConflict( 'Port binding %s=>%s conflicts with current binding %s=>%s' % ( name, port, name, self._ports[name])) else: self._ports[name] = port return port if name in self._ports: return self._ports[name] while True: rand_port = random.randint(*EphemeralPortAllocator.SOCKET_RANGE) # if this ever needs to be performant, make a peer set. if rand_port in self._ports.values(): continue try: s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.bind(('localhost', rand_port)) s.close() self._ports[name] = rand_port break except OSError as e: if e.errno == errno.EADDRINUSE: log.warning('Could not bind port: %s' % e) time.sleep(0.2) continue else: raise return self._ports[name]
def _complete_maintenance(self, drained_hosts): """End the maintenance status for a give set of hosts.""" check_and_log_response(self._client.end_maintenance(drained_hosts)) resp = self._client.maintenance_status(drained_hosts) for host_status in resp.result.maintenanceStatusResult.statuses: if host_status.mode != MaintenanceMode.NONE: log.warning('%s is DRAINING or in DRAINED' % host_status.host)
def _drain_hosts(self, drainable_hosts, clock=time): """"Drains tasks from the specified hosts. This will move active tasks on these hosts to the DRAINING state, causing them to be rescheduled elsewhere. :param drainable_hosts: Hosts that are in maintenance mode and ready to be drained :type drainable_hosts: gen.apache.aurora.ttypes.Hosts :param clock: time module for testing :type clock: time """ check_and_log_response(self._client.drain_hosts(drainable_hosts)) not_ready_hostnames = [hostname for hostname in drainable_hosts.hostNames] while not_ready_hostnames: log.info("Sleeping for %s." % self.START_MAINTENANCE_DELAY) clock.sleep(self.START_MAINTENANCE_DELAY.as_(Time.SECONDS)) resp = self._client.maintenance_status(Hosts(set(not_ready_hostnames))) if not resp.result.maintenanceStatusResult.statuses: not_ready_hostnames = None for host_status in resp.result.maintenanceStatusResult.statuses: if host_status.mode != MaintenanceMode.DRAINED: log.warning('%s is currently in status %s' % (host_status.host, MaintenanceMode._VALUES_TO_NAMES[host_status.mode])) else: not_ready_hostnames.remove(host_status.host)
def _construct_scheduler(self): """ Populates: self._scheduler self._client """ self._scheduler = SchedulerClient.get(self.cluster, verbose=self.verbose) assert self._scheduler, "Could not find scheduler (cluster = %s)" % self.cluster.name start = time.time() while (time.time() - start) < self.CONNECT_MAXIMUM_WAIT.as_( Time.SECONDS): try: self._client = self._scheduler.get_thrift_client() break except SchedulerClient.CouldNotConnect as e: log.warning('Could not connect to scheduler: %s' % e) if not self._client: raise self.TimeoutError( 'Timed out trying to connect to scheduler at %s' % self.cluster.name) server_version = self._client.getVersion().result.getVersionResult if server_version != CURRENT_API_VERSION: raise self.APIVersionError( "Client Version: %s, Server Version: %s" % (CURRENT_API_VERSION, server_version))
def _apply_states(self): """ os.stat() the corresponding checkpoint stream of this task and determine if there are new ckpt records. Attempt to read those records and update the high watermark for that stream. Returns True if new states were applied, False otherwise. """ ckpt_offset = None try: ckpt_offset = os.stat(self._runner_ckpt).st_size updated = False if self._ckpt_head < ckpt_offset: with open(self._runner_ckpt, 'r') as fp: fp.seek(self._ckpt_head) rr = ThriftRecordReader(fp, RunnerCkpt) while True: runner_update = rr.try_read() if not runner_update: break try: self._dispatcher.dispatch(self._runnerstate, runner_update) except CheckpointDispatcher.InvalidSequenceNumber as e: log.error('Checkpoint stream is corrupt: %s' % e) break new_ckpt_head = fp.tell() updated = self._ckpt_head != new_ckpt_head self._ckpt_head = new_ckpt_head return updated except OSError as e: if e.errno == errno.ENOENT: # The log doesn't yet exist, will retry later. log.warning('Could not read from checkpoint %s' % self._runner_ckpt) return False else: raise
def get_completion(result): try: children = result.get() except self.DISCONNECT_EXCEPTIONS: self._once(KazooState.CONNECTED, do_monitor) return except ke.NoNodeError: wait_exists() return except ke.KazooException as e: log.warning('Unexpected get_completion result: (%s)%s' % (type(e), e)) return children = [child for child in children if self.znode_owned(child)] _, new = self._update_children(children) for child in new: def devnull(*args, **kw): pass self.info(child, callback=devnull) monitor_queue = self._monitor_queue[:] self._monitor_queue = [] members = set(Membership(self.znode_to_id(child)) for child in children) for membership, capture in monitor_queue: if set(membership) != members: capture.set(members) else: self._monitor_queue.append((membership, capture))
def method_wrapper(*args): with self._lock: start = time.time() while not self._terminating.is_set() and ( time.time() - start) < self.RPC_MAXIMUM_WAIT.as_(Time.SECONDS): auth_args = () if method_name in self.UNAUTHENTICATED_RPCS else (self.session_key(),) try: method = getattr(self.client(), method_name) if not callable(method): return method return method(*(args + auth_args)) except (TTransport.TTransportException, self.TimeoutError) as e: if not self._terminating: log.warning('Connection error with scheduler: %s, reconnecting...' % e) self.invalidate() self._terminating.wait(self.RPC_RETRY_INTERVAL.as_(Time.SECONDS)) except Exception as e: # Take any error that occurs during the RPC call, and transform it # into something clients can handle. if not self._terminating: raise self.ThriftInternalError("Error during thrift call %s to %s: %s" % (method_name, self.cluster.name, e)) if not self._terminating: raise self.TimeoutError('Timed out attempting to issue %s to %s' % ( method_name, self.cluster.name))
def method_wrapper(*args): with self._lock: start = time.time() while not self._terminating.is_set() and ( time.time() - start) < self.RPC_MAXIMUM_WAIT.as_(Time.SECONDS): try: method = getattr(self.client(), method_name) if not callable(method): return method resp = method(*args) if resp is not None and resp.responseCode == ResponseCode.ERROR_TRANSIENT: raise self.TransientError(", ".join( [m.message for m in resp.details] if resp.details else [])) return resp except TRequestsTransport.AuthError as e: log.error(self.scheduler_client().get_failed_auth_message()) raise self.AuthError(e) except (TTransport.TTransportException, self.TimeoutError, self.TransientError) as e: if not self._terminating.is_set(): log.warning('Connection error with scheduler: %s, reconnecting...' % e) self.invalidate() self._terminating.wait(self.RPC_RETRY_INTERVAL.as_(Time.SECONDS)) except Exception as e: # Take any error that occurs during the RPC call, and transform it # into something clients can handle. if not self._terminating.is_set(): raise self.ThriftInternalError("Error during thrift call %s to %s: %s" % (method_name, self.cluster.name, e)) if not self._terminating.is_set(): raise self.TimeoutError('Timed out attempting to issue %s to %s' % ( method_name, self.cluster.name))
def __call__(self, endpoint, use_post_method=False, expected_response=None, expected_response_code=None): """Returns a (boolean, string|None) tuple of (call success, failure reason)""" try: response, response_code = self.query(endpoint, "" if use_post_method else None) response = response.strip().lower() if expected_response and response != expected_response.lower(): reason = 'Response differs from expected response (expected "%s", got "%s")' def shorten(string): return ( string if len(string) < self.FAILURE_REASON_LENGTH else "%s..." % string[: self.FAILURE_REASON_LENGTH - 3] ) log.warning(reason % (expected_response, response)) return (False, reason % (shorten(str(expected_response)), shorten(str(response)))) elif expected_response_code and response_code != expected_response_code: reason = "Response code differs from expected response (expected %i, got %i)" log.warning(reason % (expected_response_code, response_code)) return (False, reason % (expected_response_code, response_code)) else: return (True, None) except self.QueryError as e: return (False, str(e))
def method_wrapper(*args): with self._lock: start = time.time() # TODO(wfarner): The while loop causes failed unit tests to spin for the retry # period (currently 10 minutes). Figure out a better approach. while not self._terminating.is_set() and ( time.time() - start) < self.RPC_MAXIMUM_WAIT.as_(Time.SECONDS): # Only automatically append a SessionKey if this is not part of the read-only API. auth_args = () if hasattr(ReadOnlyScheduler.Iface, method_name) else (self.session_key(),) try: method = getattr(self.client(), method_name) if not callable(method): return method return method(*(args + auth_args)) except (TTransport.TTransportException, self.TimeoutError) as e: if not self._terminating: log.warning('Connection error with scheduler: %s, reconnecting...' % e) self.invalidate() self._terminating.wait(self.RPC_RETRY_INTERVAL.as_(Time.SECONDS)) except Exception as e: # Take any error that occurs during the RPC call, and transform it # into something clients can handle. if not self._terminating: raise self.ThriftInternalError("Error during thrift call %s to %s: %s" % (method_name, self.cluster.name, e)) if not self._terminating: raise self.TimeoutError('Timed out attempting to issue %s to %s' % ( method_name, self.cluster.name))
def _drain_hosts(self, drainable_hosts): """"Drains tasks from the specified hosts. This will move active tasks on these hosts to the DRAINING state, causing them to be rescheduled elsewhere. :param drainable_hosts: Hosts that are in maintenance mode and ready to be drained :type drainable_hosts: gen.apache.aurora.ttypes.Hosts :rtype: set of host names failed to drain """ check_and_log_response(self._client.drain_hosts(drainable_hosts)) drainable_hostnames = [hostname for hostname in drainable_hosts.hostNames] total_wait = self.STATUS_POLL_INTERVAL not_drained_hostnames = set(drainable_hostnames) while not self._wait_event.is_set() and not_drained_hostnames: log.info('Waiting for hosts to be in DRAINED: %s' % not_drained_hostnames) self._wait_event.wait(self.STATUS_POLL_INTERVAL.as_(Time.SECONDS)) statuses = self.check_status(list(not_drained_hostnames)) not_drained_hostnames = set(h[0] for h in statuses if h[1] != 'DRAINED') total_wait += self.STATUS_POLL_INTERVAL if not_drained_hostnames and total_wait > self.MAX_STATUS_WAIT: log.warning('Failed to move all hosts into DRAINED within %s:\n%s' % (self.MAX_STATUS_WAIT, '\n'.join("\tHost:%s\tStatus:%s" % h for h in sorted(statuses) if h[1] != 'DRAINED'))) break return not_drained_hostnames
def acreate_completion(result): try: # TODO(wickman) Kazoo has a bug: # https://github.com/python-zk/kazoo/issues/106 # https://github.com/python-zk/kazoo/pull/107 # Remove this one 1.3 is cut. path = self._zk.unchroot(result.get()) except self.DISCONNECT_EXCEPTIONS: self._once(KazooState.CONNECTED, do_join) return except ke.KazooException as e: log.warning('Unexpected Kazoo result in join: (%s)%s' % (type(e), e)) membership = Membership.error() else: created_id = self.znode_to_id(path) membership = Membership(created_id) with self._member_lock: result_future = self._members.get(membership, Future()) result_future.set_result(blob) self._members[membership] = result_future if expire_callback: self._once(KazooState.CONNECTED, expire_notifier) do_exists(path) membership_capture.set(membership)
def __call__(self, endpoint, use_post_method=False, expected_response=None, expected_response_code=None): """Returns a (boolean, string|None) tuple of (call success, failure reason)""" try: response, response_code = self.query( endpoint, '' if use_post_method else None) response = response.strip().lower() if expected_response and response != expected_response.lower(): reason = 'Response differs from expected response (expected "%s", got "%s")' def shorten(string): return (string if len(string) < self.FAILURE_REASON_LENGTH else "%s..." % string[:self.FAILURE_REASON_LENGTH - 3]) log.warning(reason % (expected_response, response)) return ( False, reason % (shorten(str(expected_response)), shorten(str(response)))) elif expected_response_code and response_code != expected_response_code: reason = 'Response code differs from expected response (expected %i, got %i)' log.warning(reason % (expected_response_code, response_code)) return (False, reason % (expected_response_code, response_code)) else: return (True, None) except self.QueryError as e: return (False, str(e))
def _fast_forward_stream(self, process_name): log.debug('Fast forwarding %s stream to seq=%s' % (process_name, self._watermarks[process_name])) assert self._processes.get(process_name) is not None fp = self._processes[process_name] rr = ThriftRecordReader(fp, RunnerCkpt) current_watermark = -1 records = 0 while current_watermark < self._watermarks[process_name]: last_pos = fp.tell() record = rr.try_read() if record is None: break new_watermark = record.process_status.seq if new_watermark > self._watermarks[process_name]: log.debug('Over-seeked %s [watermark = %s, high watermark = %s], rewinding.' % ( process_name, new_watermark, self._watermarks[process_name])) fp.seek(last_pos) break current_watermark = new_watermark records += 1 if current_watermark < self._watermarks[process_name]: log.warning('Only able to fast forward to %s@sequence=%s, high watermark is %s' % ( process_name, current_watermark, self._watermarks[process_name])) if records: log.debug('Fast forwarded %s %s record(s) to seq=%s.' % (process_name, records, current_watermark))
def get_completion(result): try: children = result.get() except self.DISCONNECT_EXCEPTIONS: self._once(KazooState.CONNECTED, do_monitor) return except ke.NoNodeError: wait_exists() return except ke.KazooException as e: log.warning('Unexpected get_completion result: (%s)%s' % (type(e), e)) return children = [child for child in children if self.znode_owned(child)] _, new = self._update_children(children) for child in new: def devnull(*args, **kw): pass self.info(child, callback=devnull) monitor_queue = self._monitor_queue[:] self._monitor_queue = [] members = set( Membership(self.znode_to_id(child)) for child in children) for membership, capture in monitor_queue: if set(membership) != members: capture.set(members) else: self._monitor_queue.append((membership, capture))
def sample(self): """ Collate and aggregate ProcessSamples for process and children Returns None: result is stored in self.value """ try: last_sample, last_stamp = self._sample, self._stamp if self._process is None: self._process = Process(self._pid) parent = self._process parent_sample = process_to_sample(parent) new_samples = dict((proc.pid, process_to_sample(proc)) for proc in parent.get_children(recursive=True)) new_samples[self._pid] = parent_sample except PsutilError as e: log.warning("Error during process sampling: %s" % e) self._sample = ProcessSample.empty() self._rate = 0.0 else: last_stamp = self._stamp self._stamp = time() # for most stats, calculate simple sum to aggregate self._sample = sum(new_samples.values(), ProcessSample.empty()) # cpu consumption is more complicated # We require at least 2 generations of a process before we can calculate rate, so for all # current processes that were not running in the previous sample, compare to an empty sample if self._sampled_tree and last_stamp: new = new_samples.values() old = [self._sampled_tree.get(pid, ProcessSample.empty()) for pid in new_samples.keys()] new_user_sys = sum(map(attrgetter("user"), new)) + sum(map(attrgetter("system"), new)) old_user_sys = sum(map(attrgetter("user"), old)) + sum(map(attrgetter("system"), old)) self._rate = (new_user_sys - old_user_sys) / (self._stamp - last_stamp) log.debug("Calculated rate for pid=%s and children: %s" % (self._process.pid, self._rate)) self._sampled_tree = new_samples
def disambiguate_args_or_die(cls, args, options, client_factory=AuroraClientAPI): """ Returns a (AuroraClientAPI, AuroraJobKey, AuroraConfigFile:str) tuple if one can be found given the args, potentially querying the scheduler with the returned client. Calls die() with an appropriate error message otherwise. Arguments: args: args from app command invocation. options: options from app command invocation. must have env and cluster attributes. client_factory: a callable (cluster) -> AuroraClientAPI. """ if not len(args) > 0: die('job path is required') try: job_key = AuroraJobKey.from_path(args[0]) client = client_factory(job_key.cluster) config_file = args[1] if len(args) > 1 else None # the config for hooks return client, job_key, config_file except AuroraJobKey.Error: log.warning("Failed to parse job path, falling back to compatibility mode") role = args[0] if len(args) > 0 else None name = args[1] if len(args) > 1 else None env = None config_file = None # deprecated form does not support hooks functionality cluster = options.cluster if not cluster: die('cluster is required') client = client_factory(cluster) return client, cls._disambiguate_or_die(client, role, env, name), config_file
def _construct_scheduler(self): """ Populates: self._scheduler_client self._client """ self._scheduler_client = SchedulerClient.get(self.cluster, verbose=self.verbose) assert self._scheduler_client, "Could not find scheduler (cluster = %s)" % self.cluster.name start = time.time() while (time.time() - start) < self.CONNECT_MAXIMUM_WAIT.as_(Time.SECONDS): try: # this can wind up generating any kind of error, because it turns into # a call to a dynamically set authentication module. self._client = self._scheduler_client.get_thrift_client() break except SchedulerClient.CouldNotConnect as e: log.warning('Could not connect to scheduler: %s' % e) except Exception as e: # turn any auth module exception into an auth error. log.debug('Warning: got an unknown exception during authentication:') log.debug(traceback.format_exc()) raise self.AuthenticationError('Error connecting to scheduler: %s' % e) if not self._client: raise self.TimeoutError('Timed out trying to connect to scheduler at %s' % self.cluster.name) server_version = self._client.getVersion().result.getVersionResult if server_version != CURRENT_API_VERSION: raise self.APIVersionError("Client Version: %s, Server Version: %s" % (CURRENT_API_VERSION, server_version))
def _construct_scheduler(self): """ Populates: self._scheduler_client self._client """ self._scheduler_client = SchedulerClient.get(self.cluster, verbose=self.verbose, **self._kwargs) assert self._scheduler_client, "Could not find scheduler (cluster = %s)" % self.cluster.name start = time.time() while (time.time() - start) < self.CONNECT_MAXIMUM_WAIT.as_( Time.SECONDS): try: # this can wind up generating any kind of error, because it turns into # a call to a dynamically set authentication module. self._client = self._scheduler_client.get_thrift_client() break except SchedulerClient.CouldNotConnect as e: log.warning('Could not connect to scheduler: %s' % e) except Exception as e: # turn any auth module exception into an auth error. log.debug( 'Warning: got an unknown exception during authentication:') log.debug(traceback.format_exc()) raise self.AuthError('Error connecting to scheduler: %s' % e) if not self._client: raise self.TimeoutError( 'Timed out trying to connect to scheduler at %s' % self.cluster.name)
def _drain_hosts(self, drainable_hosts): """"Drains tasks from the specified hosts. This will move active tasks on these hosts to the DRAINING state, causing them to be rescheduled elsewhere. :param drainable_hosts: Hosts that are in maintenance mode and ready to be drained :type drainable_hosts: gen.apache.aurora.ttypes.Hosts :rtype: set of host names failed to drain """ check_and_log_response(self._client.drain_hosts(drainable_hosts)) drainable_hostnames = [hostname for hostname in drainable_hosts.hostNames] total_wait = self.STATUS_POLL_INTERVAL not_drained_hostnames = set(drainable_hostnames) while not self._wait_event.is_set() and not_drained_hostnames: self._wait_event.wait(self.STATUS_POLL_INTERVAL.as_(Time.SECONDS)) not_drained_hostnames = self.check_if_drained(drainable_hostnames) total_wait += self.STATUS_POLL_INTERVAL if not_drained_hostnames and total_wait > self.MAX_STATUS_WAIT: log.warning('Failed to move all hosts into DRAINED within %s' % self.MAX_STATUS_WAIT) break return not_drained_hostnames
def run(self): """Thread entrypoint. Loop indefinitely, polling collectors at self._collection_interval and collating samples.""" log.debug('Commencing resource monitoring for task "%s"' % self._task_id) next_process_collection = 0 next_disk_collection = 0 while not self._kill_signal.is_set(): now = time.time() if now > next_process_collection: next_process_collection = now + self._process_collection_interval actives = set(self._get_active_processes()) current = set(self._process_collectors) for process in current - actives: self._process_collectors.pop(process) for process in actives - current: self._process_collectors[process] = self._process_collector_factory(process.pid) for process, collector in self._process_collectors.items(): collector.sample() if now > next_disk_collection: next_disk_collection = now + self._disk_collection_interval if not self._disk_collector: sandbox = self._task_monitor.get_sandbox() if sandbox: self._disk_collector = self._disk_collector_class(sandbox) if self._disk_collector: self._disk_collector.sample() else: log.debug('No sandbox detected yet for %s' % self._task_id) try: aggregated_procs = sum(map(attrgetter('procs'), self._process_collectors.values())) aggregated_sample = sum(map(attrgetter('value'), self._process_collectors.values()), ProcessSample.empty()) disk_value = self._disk_collector.value if self._disk_collector else 0 self._history.add(now, self.ResourceResult(aggregated_procs, aggregated_sample, disk_value)) except ValueError as err: log.warning("Error recording resource sample: %s" % err) # Sleep until any of the following conditions are met: # - it's time for the next disk collection # - it's time for the next process collection # - the result from the last disk collection is available via the DiskCollector # - the TaskResourceMonitor has been killed via self._kill_signal now = time.time() next_collection = min(next_process_collection - now, next_disk_collection - now) if self._disk_collector: waiter = EventMuxer(self._kill_signal, self._disk_collector.completed_event) else: waiter = self._kill_signal waiter.wait(timeout=max(0, next_collection)) log.debug('Stopping resource monitoring for task "%s"' % self._task_id)
def __iter__(self): """Iterate over the services (ServiceInstance objects) in this ServerSet.""" for member in self._group.list(): try: yield ServiceInstance.unpack(self._group.info(member)) except Exception as e: log.warning('Failed to deserialize endpoint: %s' % e) continue
def run(self): self.runner._run_plan(self.runner._finalizing_plan) log.debug("TaskRunnerStage[FINALIZING]: Finalization remaining: %s" % self.runner._finalization_remaining()) if self.runner.deadlocked(self.runner._finalizing_plan): log.warning("Finalizing plan deadlocked.") return None if self.runner._finalization_remaining() > 0 and not self.runner._finalizing_plan.is_complete(): return min(self.runner._finalization_remaining(), self.MAX_ITERATION_WAIT.as_(Time.SECONDS))
def perform_maintenance(self, hostnames, grouping_function=DEFAULT_GROUPING, percentage=None, duration=None, output_file=None): """Put hosts into maintenance mode and drain them. Walk through the process of putting hosts into maintenance and draining them of tasks. The hosts will remain in maintenance mode upon completion. :param hostnames: A list of hostnames to operate upon :type hostnames: list of strings :param grouping_function: How to split up the hostname into groups :type grouping_function: function :param percentage: SLA percentage to use :type percentage: float :param duration: SLA duration to use :type duration: twitter.common.quantity.Time :param output_file: file to write hosts that were not drained due to failed SLA check :type output_file: string :rtype: set of host names that were successfully drained """ hostnames = self.start_maintenance(hostnames) not_drained_hostnames = set() for hosts in self.iter_batches(hostnames, grouping_function): log.info('Beginning SLA check for %s' % hosts.hostNames) unsafe_hostnames = self._check_sla( list(hosts.hostNames), grouping_function, percentage, duration) if unsafe_hostnames: log.warning('Some hosts did not pass SLA check and will not be drained! ' 'Skipping hosts: %s' % unsafe_hostnames) not_drained_hostnames |= unsafe_hostnames drainable_hostnames = hosts.hostNames - unsafe_hostnames if not drainable_hostnames: continue hosts = Hosts(drainable_hostnames) else: log.info('All hosts passed SLA check.') self._drain_hosts(hosts) if not_drained_hostnames: output = '\n'.join(list(not_drained_hostnames)) log.info('The following hosts did not pass SLA check and were not drained:') print(output) if output_file: try: with open(output_file, 'w') as fp: fp.write(output) fp.write('\n') log.info('Written unsafe host names into: %s' % output_file) except IOError as e: log.error('Failed to write into the output file: %s' % e) return set(hostnames) - not_drained_hostnames
def run(self): self.runner._run_plan(self.runner._finalizing_plan) log.debug('TaskRunnerStage[FINALIZING]: Finalization remaining: %s' % self.runner._finalization_remaining()) if self.runner.deadlocked(self.runner._finalizing_plan): log.warning('Finalizing plan deadlocked.') return None if self.runner._finalization_remaining() > 0 and not self.runner._finalizing_plan.is_complete(): return min(self.runner._finalization_remaining(), self.MAX_ITERATION_WAIT.as_(Time.SECONDS))
def get_completion(_, rc, children): if rc in self._zk.COMPLETION_RETRY: do_monitor() return if rc != zookeeper.OK: log.warning('Unexpected get_completion return code: %s' % ZooKeeper.ReturnCode(rc)) promise.set(set([Membership.error()])) return self._update_children(children) set_different(promise, membership, self._members)
def run(self): """Thread entrypoint. Loop indefinitely, polling collectors at self._collection_interval and collating samples.""" log.debug('Commencing resource monitoring for task "%s"' % self._task_id) next_process_collection = 0 next_disk_collection = 0 while not self._kill_signal.is_set(): now = time.time() if now > next_process_collection: next_process_collection = now + self._process_collection_interval actives = set(self._get_active_processes()) current = set(self._process_collectors) for process in current - actives: log.debug('Process "%s" (pid %s) no longer active, removing from monitored processes' % (process.process, process.pid)) self._process_collectors.pop(process) for process in actives - current: log.debug('Adding process "%s" (pid %s) to resource monitoring' % (process.process, process.pid)) self._process_collectors[process] = self._process_collector_factory(process.pid) for process, collector in self._process_collectors.items(): log.debug('Collecting sample for process "%s" (pid %s) and children' % (process.process, process.pid)) collector.sample() if now > next_disk_collection: next_disk_collection = now + self._disk_collection_interval log.debug('Collecting disk sample for %s' % self._sandbox) self._disk_collector.sample() try: aggregated_procs = sum(map(attrgetter('procs'), self._process_collectors.values())) aggregated_sample = sum(map(attrgetter('value'), self._process_collectors.values()), ProcessSample.empty()) self._history.add(now, self.ResourceResult(aggregated_procs, aggregated_sample, self._disk_collector.value)) log.debug("Recorded resource sample at %s" % now) except ValueError as err: log.warning("Error recording resource sample: %s" % err) # Sleep until any of the following conditions are met: # - it's time for the next disk collection # - it's time for the next process collection # - the result from the last disk collection is available via the DiskCollector # - the TaskResourceMonitor has been killed via self._kill_signal now = time.time() next_collection = min(next_process_collection - now, next_disk_collection - now) EventMuxer(self._kill_signal, self._disk_collector.completed_event ).wait(timeout=max(0, next_collection)) log.debug('Stopping resource monitoring for task "%s"' % self._task_id)
def _reconnect(self): """Reconnect to ZK and update endpoints once complete.""" for _ in range(self._retries): try: self._zk.restart() self._start() break except ZooKeeper.ConnectionTimeout: log.warning('Connection establishment to %r timed out, retrying.' % self._zk) else: raise ServerSetClient.ReconnectFailed('Re-establishment of connection to ZK servers failed')
def _reconnect(self): """Reconnect to ZK and update endpoints once complete.""" for _ in range(self._retries): try: self._zk.restart() self._start() break except ZooKeeper.ConnectionTimeout: log.warning("Connection establishment to %r timed out, retrying." % self._zk) else: raise ServerSetClient.ReconnectFailed("Re-establishment of connection to ZK servers failed")
def _complete_maintenance(self, drained_hosts): """End the maintenance status for a given set of hosts. :param drained_hosts: Hosts that are drained and finished being operated upon :type drained_hosts: gen.apache.aurora.ttypes.Hosts """ check_and_log_response(self._client.end_maintenance(drained_hosts)) resp = self._client.maintenance_status(drained_hosts) for host_status in resp.result.maintenanceStatusResult.statuses: if host_status.mode != MaintenanceMode.NONE: log.warning('%s is DRAINING or in DRAINED' % host_status.host)
def _maybe_update_failure_count(self, is_healthy, reason): if not is_healthy: log.warning('Health check failure: %s' % reason) self.current_consecutive_failures += 1 if self.current_consecutive_failures > self.max_consecutive_failures: log.warning('Reached consecutive failure limit.') self.healthy = False self.reason = reason else: if self.current_consecutive_failures > 0: log.debug('Reset consecutive failures counter.') self.current_consecutive_failures = 0
def resolve_ports(mesos_task, portmap): """Given a MesosTaskInstance and the portmap of resolved ports from the scheduler, create a fully resolved map of port name => port number for the thermos runner and discovery manager.""" task_portmap = mesos_task.announce().portmap().get() if mesos_task.has_announce() else {} task_portmap.update(portmap) task_portmap = PortResolver.resolve(task_portmap) for name, port in task_portmap.items(): if not isinstance(port, int): log.warning('Task has unmapped port: %s => %s' % (name, port)) return dict((name, port) for (name, port) in task_portmap.items() if isinstance(port, int))
def get_completion(_, rc, children): if rc in self._zk.COMPLETION_RETRY: do_monitor() return if rc == zookeeper.NONODE: wait_exists() return if rc != zookeeper.OK: log.warning('Unexpected get_completion return code: %s' % ReturnCode(rc)) capture.set(set([Membership.error()])) return self._update_children(children) set_different(capture, membership, self._members)
def _kill(self): processes = TaskRunnerHelper.scan_tree(self._state) for process, pid_tuple in processes.items(): current_run = self._current_process_run(process) coordinator_pid, pid, tree = pid_tuple if TaskRunnerHelper.is_process_terminal(current_run.state): if coordinator_pid or pid or tree: log.warning( 'Terminal process (%s) still has running pids:' % process) log.warning(' coordinator_pid: %s' % coordinator_pid) log.warning(' pid: %s' % pid) log.warning(' tree: %s' % tree) TaskRunnerHelper.kill_process(self.state, process) else: if coordinator_pid or pid or tree: log.info('Transitioning %s to KILLED' % process) self._set_process_status(process, ProcessState.KILLED, stop_time=self._clock.time(), return_code=-1) else: log.info('Transitioning %s to LOST' % process) if current_run.state != ProcessState.WAITING: self._set_process_status(process, ProcessState.LOST)
def _do_health_check(self): if self._should_enforce_deadline(): # This is needed otherwise it is possible to flap between # successful health-checks and failed health-checks, never # really satisfying the criteria for either healthy or unhealthy. log.warning( 'Exhausted attempts before satisfying liveness criteria.') self.healthy = False self.reason = 'Not enough successful health checks in time.' return self.healthy, self.reason is_healthy, reason = self._perform_check_if_not_disabled() if self.attempts <= self.max_attempts_to_running: self.attempts += 1 self._maybe_update_health_check_count(is_healthy, reason) return is_healthy, reason
def adelete_completion(result): try: success = result.get() except self.DISCONNECT_EXCEPTIONS: self._once(KazooState.CONNECTED, do_cancel) return except ke.NoNodeError: success = True except ke.KazooException as e: log.warning('Unexpected Kazoo result in cancel: (%s)%s' % (type(e), e)) success = False future = self._members.pop(member.id, Future()) future.set_result(Membership.error()) capture.set(success)
def get_completion(result): try: children = result.get() except self.DISCONNECT_EXCEPTIONS: self._once(KazooState.CONNECTED, do_monitor) return except ke.NoNodeError: wait_exists() return except ke.KazooException as e: log.warning('Unexpected get_completion result: (%s)%s' % (type(e), e)) capture.set(set([Membership.error()])) return self._update_children(children) set_different(capture, membership, self._members)
def exists_completion(result): try: stat = result.get() except self.DISCONNECT_EXCEPTIONS: self._once(KazooState.CONNECTED, wait_exists) return except ke.NoNodeError: wait_exists() return except ke.KazooException as e: log.warning('Unexpected exists_completion result: (%s)%s' % (type(e), e)) return if stat: do_monitor()
def method_wrapper(*args): with self._lock: start = time.time() while not self._terminating.is_set() and ( time.time() - start) < self.RPC_MAXIMUM_WAIT.as_( Time.SECONDS): # Only automatically append a SessionKey if this is not part of the read-only API. auth_args = () if hasattr( ReadOnlyScheduler.Iface, method_name) else (self.session_key(), ) try: method = getattr(self.client(), method_name) if not callable(method): return method resp = method(*(args + auth_args)) if resp is not None and resp.responseCode == ResponseCode.ERROR_TRANSIENT: raise self.TransientError( ", ".join([m.message for m in resp. details] if resp.details else [])) if resp.serverInfo.thriftAPIVersion != THRIFT_API_VERSION: raise self.APIVersionError( "Client Version: %s, Server Version: %s" % (THRIFT_API_VERSION, resp.serverInfo.thriftAPIVersion)) return resp except (TTransport.TTransportException, self.TimeoutError, self.TransientError) as e: if not self._terminating.is_set(): log.warning( 'Connection error with scheduler: %s, reconnecting...' % e) self.invalidate() self._terminating.wait( self.RPC_RETRY_INTERVAL.as_(Time.SECONDS)) except Exception as e: # Take any error that occurs during the RPC call, and transform it # into something clients can handle. if not self._terminating.is_set(): raise self.ThriftInternalError( "Error during thrift call %s to %s: %s" % (method_name, self.cluster.name, e)) if not self._terminating.is_set(): raise self.TimeoutError( 'Timed out attempting to issue %s to %s' % (method_name, self.cluster.name))
def process_to_sample(process): """ Given a psutil.Process, return a current ProcessSample """ try: # the nonblocking get_cpu_percent call is stateful on a particular Process object, and hence # >2 consecutive calls are required before it will return a non-zero value rate = process.cpu_percent(0.0) / 100.0 cpu_times = process.cpu_times() user, system = cpu_times.user, cpu_times.system memory_info = process.memory_info() rss, vms = memory_info.rss, memory_info.vms nice = process.nice() status = process.status() threads = process.num_threads() return ProcessSample(rate, user, system, rss, vms, nice, status, threads) except (AccessDenied, NoSuchProcess) as e: log.warning('Error during process sampling [pid=%s]: %s' % (process.pid, e)) return ProcessSample.empty()
def reap_children(cls): pids = set() while True: try: pid, status, rusage = os.wait3(os.WNOHANG) if pid == 0: break pids.add(pid) log.debug('Detected terminated process: pid=%s, status=%s, rusage=%s' % ( pid, status, rusage)) except OSError as e: if e.errno != errno.ECHILD: log.warning('Unexpected error when calling waitpid: %s' % e) break return pids