def rpc(self, method, callback=None, acceptable_replies=[]): """ Shortcut wrapper to the Connection's rpc command using its callback stack, passing in our channel number """ # Make sure the channel is open self._ensure() # If we're blocking, add subsequent commands to our stack if self.blocking: log.debug('%s: %s is blocking this channel', self.__class__.__name__, self.blocking) self._blocked.append([method, callback, acceptable_replies]) return # If this is a synchronous method, block connections until we're done if method.synchronous: log.debug('%s: %s turning on blocking', self.__class__.__name__, method.NAME) self.blocking = method.NAME for reply in acceptable_replies: self.callbacks.add(self.channel_number, reply, self._on_synchronous_complete) if callback: self.callbacks.add(self.channel_number, reply, callback) self.send_method(method)
def _close_connection(self): """Close the connection with the AMQP Connection-Forced value.""" duration = self._missed * self._interval log.debug('heartbeat: %s - close connection; missed %s' % (self, duration)) self._connection.close(HeartbeatChecker._CONNECTION_FORCED, HeartbeatChecker._STALE_CONNECTION % duration)
def process(self, prefix, key, caller, *args, **keywords): """ Run through and process all the callbacks for the specified keys. Caller should be specified at all times so that callbacks which require a specific function to call CallbackManager.process will not be processed. """ # Lets not use objects, since we could have module class/obj key = self.sanitize(key) # Make sure we have a callback for this event if prefix not in self._callbacks or key not in self._callbacks[prefix]: return None callbacks = list() one_shot_remove = list() # Loop through callbacks that want all prefixes and what we asked for for callback in self._callbacks[prefix][key]: if 'only' not in callback or callback['only'] == caller.__class__: callbacks.append(callback['handle']) # If it's a one shot callback, add it to a list for removal if callback['one_shot']: one_shot_remove.append([prefix, key, callback]) # Remove the one shot callbacks that were called for prefix, key, callback in one_shot_remove: self.remove(prefix, key, callback) # Prevent recursion for callback in callbacks: log.debug('CallbackManager: Calling %s for "%s:%s"' % \ (callback, prefix, key)) callback(*args, **keywords)
def send_and_check(self): """Invoked by a timer to send a heartbeat when we need to, check to see if we've missed any heartbeats and disconnect our connection if it's been idle too long. """ log.debug('send_and_check: heartbeat: %s -s old bytes sent: %s and received: %s' % (self, self._sent, self._received)) # If too many heartbeats have been missed, close & reset the connection if self._too_many_missed_heartbeats(): log.debug('heartbeat: %s - missed too many heartbeats; close connection' % self) self._close_connection() return # If there have been no bytes received since the last check if self._should_send_heartbeat_frame(): log.debug('heartbeat: %s - send heartbeat frame' % self) self._send_heartbeat_frame() else: log.debug('heartbeat: %s - no heartbeat to send' % self) # Update the byte counts for the next check self._update_byte_counts() log.debug('heartbeat: %s - updated byte counts: sent: %s; received: %s' % (self, self._sent, self._received)) # Update the timer to fire again self._start_timer()
def derive(self, name, mtype, value, ts=None): ts_now = time() cache = ft.partial(self._cache, 'counters', ts=ts_now) ts = ts or ts_now if mtype == 'counter': try: v0, ts0, _ = cache(name) except KeyError: self.log.debug('Initializing bucket for new counter: {}'.format(name)) cache(name, (value, ts)) raise self.IgnoreValue(name) value = float(value - v0) / (ts - ts0) cache(name, (value, ts)) if value < 0: self.log.debug( 'Detected counter overflow' ' (negative delta): {}, {} -> {}'.format(name, v0, value) ) raise self.IgnoreValue(name) elif mtype == 'gauge': value = value elif mtype == 'timestamp': try: ts = cache(name) except KeyError: ts = None if ts == value: log.debug( 'Ignoring duplicate' ' timestamp value for {}: {}'.format(name, value) ) raise self.IgnoreValue(name) value, ts = 1, cache(name, value) else: raise TypeError('Unknown type: {}'.format(mtype)) return value, ts
def _handle_error(self, error): """ Internal error handling method. Here we expect a socket.error coming in and will handle different socket errors differently. """ # Handle version differences in Python if hasattr(error, 'errno'): # Python >= 2.6 error_code = errno.errorcode[error.errno] elif error: error_code = error[0] # Python <= 2.5 # Ok errors, just continue what we were doing before if error_code in ERRORS_TO_IGNORE: log.debug("Ignoring %s", error_code) return None # Socket is closed, so lets just go to our handle_close method elif error_code == errno.EBADF: log.error("%s: Socket is closed", self.__class__.__name__) self._handle_disconnect() elif self.parameters.ssl and isinstance(error, ssl.SSLError): log.error(repr(error)) else: # Haven't run into this one yet, log it. log.error("%s: Socket Error on fd %d: %s", self.__class__.__name__, self.socket.fileno(), error_code) log.debug("Not handled?") # Disconnect from our IOLoop and let Connection know what's up self._handle_disconnect()
def _setup_timer(self): """Use the connection objects delayed_call function which is implemented by the Adapter for calling the check_heartbeats function every interval seconds. """ log.debug('heartbeat: %s - add heartbeat timeout' % self) self._connection.add_timeout(self._interval, self.send_and_check)
def poll(self): # Poll until TIMEOUT waiting for an event events = self._poll.poll(int(SelectPoller.TIMEOUT * 1000)) # If we didn't timeout pass the event to the handler if events: log.debug("%s: Calling %s", self.__class__.__name__, self._handler) self._handler(events[0][0], events[0][1])
def _flush_outbound(self): try: self._handle_write() self._socket_timeouts = 0 except socket.timeout: self._socket_timeouts += 1 log.debug('_flush_outbound: write socket.timeout number: %s' % self._socket_timeouts) if self._socket_timeouts > SOCKET_TIMEOUT_THRESHOLD: log.error(SOCKET_TIMEOUT_MESSAGE) self._handle_disconnect()
def process_timeouts(self): """ Process our self._timeouts event stack. """ # Process our timeout events keys = self._timeouts.keys() start_time = time.time() for timeout_id in keys: if timeout_id in self._timeouts and self._timeouts[timeout_id]["deadline"] <= start_time: log.debug("%s: Timeout calling %s", self.__class__.__name__, self._timeouts[timeout_id]["handler"]) self._timeouts.pop(timeout_id)["handler"]()
def add_timeout(self, deadline, callback): """ Add a timeout to the stack by deadline as offset. """ t = time.time() timeout_id = '%.8f' % t dl = t + deadline log.debug('add timeout: time: %s; deadline: %s; and callback: %s' % (t, dl, callback)) self._timeouts[timeout_id] = {'deadline': dl, 'handler': callback} return timeout_id
def process_timeouts(self): """ Process our self._timeouts event stack. """ # Process our timeout events keys = self._timeouts.keys() start_time = time.time() for timeout_id in keys: if timeout_id in self._timeouts and \ self._timeouts[timeout_id]['deadline'] <= start_time: log.debug('%s: Timeout calling %s', self.__class__.__name__, self._timeouts[timeout_id]['handler']) self._timeouts.pop(timeout_id)['handler']()
def _do_ssl_handshake(self): """ Copied from python stdlib test_ssl.py. """ log.debug("_do_ssl_handshake") try: self.socket.do_handshake() except ssl.SSLError, err: if err.args[0] in (ssl.SSL_ERROR_WANT_READ, ssl.SSL_ERROR_WANT_WRITE): return elif err.args[0] == ssl.SSL_ERROR_EOF: return self._handle_disconnect() raise
def process_timeouts(self): """ Process our self._timeouts event stack """ # Process our timeout events keys = self._timeouts.keys() start_time = time.time() for timeout_id in keys: if timeout_id in self._timeouts and \ self._timeouts[timeout_id]['deadline'] <= start_time: log.debug('%s: Timeout calling %s', self.__class__.__name__, self._timeouts[timeout_id]['handler']) self._timeouts.pop(timeout_id)['handler']()
def fetch_from_src(source): try: with Timeout(to_break),\ closing(socket.socket( socket.AF_INET, socket.SOCK_STREAM )) as sock: log.debug('Fetching from source: {}'.format(source)) sock.connect(source) buff = bytes() while True: chunk = sock.recv(1*2**20) if not chunk: return buff buff += chunk except (Timeout, socket.error) as err: log.debug('Connection to source {} failed ({err})'.format(source, err=err)) return DataPollError # indicates failure
def _missed_heartbeat_responses(self): """Check to see if the received byte count matches the connection object received byte count. If the bytes are equal, there has not been a heartbeat sent since the last check. """ log.debug('heartbeat: %s - check missed heartbeat: old received: %s, received: %s' % (self, self._received, self._connection_bytes_received())) if self._received == self._connection_bytes_received(): self._missed += 1 else: # The server has said something. Reset our count. self._missed = 0 # Return the missed heartbeat response count return self._missed
def _handle_error(self, error): """ Internal error handling method. Here we expect a socket.error coming in and will handle different socket errors differently. """ # Handle version differences in Python if hasattr(error, 'errno'): # Python >= 2.6 error_code = error.errno elif error is not None: error_code = error[0] # Python <= 2.5 else: # This shouldn't happen, but log it in case it does log.error("%s: Tried to handle an error where no error existed", self.__class__.__name__) # Cannot continue as error_code is not set return # Ok errors, just continue what we were doing before if error_code in ERRORS_TO_IGNORE: log.debug("Ignoring %s", error_code) return None # Socket is closed, so lets just go to our handle_close method elif error_code in (errno.EBADF, errno.ECONNABORTED): log.error("%s: Socket is closed", self.__class__.__name__) elif self.parameters.ssl and isinstance(error, ssl.SSLError): # SSL socket operation needs to be retried if error_code in (ssl.SSL_ERROR_WANT_READ, ssl.SSL_ERROR_WANT_WRITE): return None else: log.error("%s: SSL Socket error on fd %d: %s", self.__class__.__name__, self.socket.fileno(), repr(error)) else: # Haven't run into this one yet, log it. log.error("%s: Socket Error on fd %d: %s", self.__class__.__name__, self.socket.fileno(), error_code) # Disconnect from our IOLoop and let Connection know what's up self._handle_disconnect() return None
def _handle_error(self, error): """ Internal error handling method. Here we expect a socket.error coming in and will handle different socket errors differently. """ # Handle version differences in Python if hasattr(error, 'errno'): # Python >= 2.6 error_code = error.errno elif error is not None: error_code = error[0] # Python <= 2.5 else: # This shouldn't happen, but log it in case it does log.error("%s: Tried to handle an error where no error existed", self.__class__.__name__) # Ok errors, just continue what we were doing before if error_code in ERRORS_TO_IGNORE: log.debug("Ignoring %s", error_code) return None # Socket is closed, so lets just go to our handle_close method elif error_code in (errno.EBADF, errno.ECONNABORTED): log.error("%s: Socket is closed", self.__class__.__name__) elif self.parameters.ssl and isinstance(error, ssl.SSLError): # SSL socket operation needs to be retried if error_code in (ssl.SSL_ERROR_WANT_READ, ssl.SSL_ERROR_WANT_WRITE): return None else: log.error("%s: SSL Socket error on fd %d: %s", self.__class__.__name__, self.socket.fileno(), repr(error)) else: # Haven't run into this one yet, log it. log.error("%s: Socket Error on fd %d: %s", self.__class__.__name__, self.socket.fileno(), error_code) # Disconnect from our IOLoop and let Connection know what's up self._handle_disconnect() return None
def __init__(self, connection, interval): """Create a heartbeat on connection sending a heartbeat frame every interval seconds. :param pika.connection.Connection: Connection object :param int interval: Heartbeat check interval """ # We need to reference our connection object to close a connection self._connection = connection self._interval = interval # Initialize counters self._missed = 0 self._received = 0 self._sent = 0 log.debug('heartbeat: %s - with interval: %s' % (self, self._interval)) # Setup the timer to fire in _interval seconds self._setup_timer()
def process_data_events(self): # Make sure we're open, if not raise the exception if not self.is_open and not self.is_closing: raise AMQPConnectionError # Read data try: self._handle_read() self._socket_timeouts = 0 except socket.timeout: self._socket_timeouts += 1 log.debug('process_data_events: read socket.timeout number: %s' % self._socket_timeouts) if self._socket_timeouts > SOCKET_TIMEOUT_THRESHOLD: log.error(SOCKET_TIMEOUT_MESSAGE) self._handle_disconnect() # Process our timeout events self.process_timeouts() # Write our data self._flush_outbound()
def add(self, prefix, key, callback, one_shot=True, only_caller=None): """ Add a callback to the stack for the specified key. If the call is specified as one_shot, it will be removed after being fired The prefix is usually the channel number but the class is generic and prefix and key may be any value. If you pass in only_caller CallbackManager will restrict processing of the callback to only the calling function/object that you specify. """ # Lets not use objects, since we could have object/class issues key = self.sanitize(key) # Make sure we've seen the prefix before if prefix not in self._callbacks: self._callbacks[prefix] = dict() # If we don't have the key in our callbacks, add it if key not in self._callbacks[prefix]: self._callbacks[prefix][key] = list() # Our callback info we need elsewhere in the class callback_dict = {'handle': callback, 'one_shot': one_shot} if only_caller: callback_dict['only'] = only_caller # If we passed in that we do not want duplicates, check and keep us # from adding it a second time if callback_dict in self._callbacks[prefix][key]: log.warning('%s.add: Duplicate callback found for "%s:%s"', self.__class__.__name__, prefix, key) return # Append the callback to our key list self._callbacks[prefix][key].append(callback_dict) log.debug('%s: Added "%s:%s" with callback: %s', self.__class__.__name__, prefix, key, callback) return prefix, key
def handler(body_frame): # Make sure it's a body frame if not isinstance(body_frame, Body): raise exceptions.UnexpectedFrameError(body_frame) # Increment our counter so we know when we've had enough seen_so_far[0] += len(body_frame.fragment) # Append the fragment to our list body_fragments.append(body_frame.fragment) # Did we get enough bytes? If so finish if seen_so_far[0] == header_frame.body_size: finish() elif seen_so_far[0] < header_frame.body_size: log.debug("Received message Body frame, %i of %i bytes of \ message body received.", seen_so_far[0], header_frame.body_size) # Did we get too many bytes? elif seen_so_far[0] > header_frame.body_size: error = 'Received %i and only expected %i' % \ (seen_so_far[0], header_frame.body_size) raise exceptions.BodyTooLongError(error)
""" if self.parameters.ssl and self._ssl_connecting: return self._do_ssl_handshake() try: if self.parameters.ssl and self.socket.pending(): data = self.socket.read(self._suggested_buffer_size) else: data = self.socket.recv(self._suggested_buffer_size) except socket.timeout: raise except socket.error, error: return self._handle_error(error) # We received no data, so disconnect if not data: log.debug('Calling disconnect') return self._adapter_disconnect() # Pass the data into our top level frame dispatching method self._on_data_available(data) def _handle_write(self): """ We only get here when we have data to write, so try and send Pika's suggested buffer size of data (be nice to Windows). """ if self.parameters.ssl and self._ssl_connecting: return self._do_ssl_handshake() if not self.write_buffer: self.write_buffer = \
def __init__(self, parameters=None, reconnection_strategy=None): BaseConnection.__init__(self, parameters, None, reconnection_strategy) for h in log.logger.handlers: log.debug('pika handlers: %s' % h)
def remove(self, prefix, key, callback=None): """ Remove a callback from the stack by prefix, key and optionally the callback itself. If you only pass in prefix and key, all callbacks for that prefix and key will be removed. """ # Cast our key to a string so we don't get any weirdness # Lets not use objects, since we could have module class/obj key = self.sanitize(key) if prefix in self._callbacks and key in self._callbacks[prefix]: if callback: # Remove the callback from the _callbacks dict if callback in self._callbacks[prefix][key]: self._callbacks[prefix][key].remove(callback) log.debug('%s: Removed %s for "%s:%s"', self.__class__.__name__, callback, prefix, key) # Remove the list from the dict if it's empty if not self._callbacks[prefix][key]: del(self._callbacks[prefix][key]) log.debug('%s: Removed empty key "%s:%s"', self.__class__.__name__, prefix, key) # Remove the prefix if it's empty if not self._callbacks[prefix]: del(self._callbacks[prefix]) log.debug('%s: Removed empty prefix "%s"', self.__class__.__name__, prefix) return True else: # Remove the list from the dict if it's empty del(self._callbacks[prefix][key]) log.debug('%s: Removed key "%s:%s"', self.__class__.__name__, prefix, key) # Remove the prefix if it's empty if not self._callbacks[prefix]: del(self._callbacks[prefix]) log.debug('%s: Removed empty prefix "%s"', self.__class__.__name__, prefix) else: # If we just passed in a prefix for a key if prefix in self._callbacks and key in self._callbacks[prefix]: del(self._callbacks[prefix][key]) log.debug('%s: Removed all callbacks for "%s:%s"', self.__class__.__name__, prefix, key) return True # Prefix, Key or Callback could not be found return False
def decode_frame(data_in): """ Receives raw socket data and attempts to turn it into a frame. Returns bytes used to make the frame and the frame """ # Look to see if it's a protocol header frame try: if data_in[0:4] == 'AMQP': major, minor, revision = struct.unpack_from('BBB', data_in, 5) return 8, ProtocolHeader(major, minor, revision) except IndexError: # We didn't get a full frame return 0, None except struct.error: # We didn't get a full frame return 0, None # Get the Frame Type, Channel Number and Frame Size try: frame_type, channel_number, frame_size = \ struct.unpack('>BHL', data_in[0:7]) except struct.error: # We didn't get a full frame return 0, None # Get the frame data frame_end = spec.FRAME_HEADER_SIZE +\ frame_size +\ spec.FRAME_END_SIZE # We don't have all of the frame yet if frame_end > len(data_in): return 0, None # The Frame termination chr is wrong if data_in[frame_end - 1] != chr(spec.FRAME_END): raise exceptions.InvalidFrameError("Invalid FRAME_END marker") # Get the raw frame data frame_data = data_in[spec.FRAME_HEADER_SIZE:frame_end - 1] if frame_type == spec.FRAME_METHOD: # Get the Method ID from the frame data method_id = struct.unpack_from('>I', frame_data)[0] # Get a Method object for this method_id method = spec.methods[method_id]() # Decode the content method.decode(frame_data, 4) # Return the amount of data consumed and the Method object return frame_end, Method(channel_number, method) elif frame_type == spec.FRAME_HEADER: # Return the header class and body size class_id, weight, body_size = struct.unpack_from('>HHQ', frame_data) # Get the Properties type properties = spec.props[class_id]() log.debug("<%r>", properties) # Decode the properties out = properties.decode(frame_data[12:]) log.debug("<%r>", out) # Return a Header frame return frame_end, Header(channel_number, body_size, properties) elif frame_type == spec.FRAME_BODY: # Return the amount of data consumed and the Body frame w/ data return frame_end, Body(channel_number, frame_data) elif frame_type == spec.FRAME_HEARTBEAT: # Return the amount of data and a Heartbeat frame return frame_end, Heartbeat() raise exceptions.InvalidFrameError("Unknown frame type: %i" % frame_type)
def main(): global graphite_min_cycle # can be updated import argparse parser = argparse.ArgumentParser( description='Collect various metrics from gmond and dispatch' ' them graphite-style at regular intervals to amqp (so they can be routed to carbon).') parser.add_argument('-c', '--config', action='append', default=list(), help='Additional configuration files to read. Can be specified' ' multiple times, values from later ones override values in the former.') parser.add_argument('-n', '--dry-run', action='store_true', help='Do not actually send data.') parser.add_argument('--dump', action='store_true', help='Dump polled data to stdout.') parser.add_argument('--debug', action='store_true', help='Verbose operation mode.') optz = parser.parse_args() cfg = AttrDict.from_yaml('{}.yaml'.format( os.path.splitext(os.path.realpath(__file__))[0] ), if_exists=True) for k in optz.config: cfg.update_yaml(k) configure_logging( cfg.logging, logging.DEBUG if optz.debug else logging.WARNING ) logging.captureWarnings(cfg.logging.warnings) optz.dump = optz.dump or cfg.debug.dump_data optz.dry_run = optz.dry_run or cfg.debug.dry_run graphite_min_cycle = cfg.metrics.interval mangler = DataMangler( name_template=cfg.metrics.name.full, name_rewrite=cfg.metrics.name.rewrite, name_aliases=cfg.metrics.name.aliases ) log = logging.getLogger('gmond_amqp.amqp_link') if not cfg.logging.tracebacks: log.exception = log.error amqp = AMQPPublisher( host=cfg.net.amqp.host, auth=(cfg.net.amqp.user, cfg.net.amqp.password), exchange=cfg.net.amqp.exchange, heartbeat=cfg.net.amqp.heartbeat, log=log, libc_gethostbyname=gethostbyname\ if not cfg.net.bypass_libc_gethostbyname else False ) log = logging.getLogger('gmond_amqp.main_loop') ts, data = time(), list() self_profiling = cfg.metrics.self_profiling and '{}.gmond_amqp'.format( socket.gethostname() if cfg.net.bypass_libc_gethostbyname else gethostname() ) while True: ts_now = time() xml = gmond_poll( cfg.net.gmond.hosts, libc_gethostbyname=gethostbyname\ if not cfg.net.bypass_libc_gethostbyname else False, default_port=cfg.net.gmond.default_port ) if self_profiling: ts_new, ts_prof = time(), ts_now val, ts_prof = ts_new - ts_prof, ts_new data.append(('{}.poll'.format(self_profiling), ts_now, val, val)) xml = gmond_xml_process( xml, validate=cfg.net.gmond.validate_xml, validate_strict=cfg.net.gmond.validate_strict ) if self_profiling: ts_new = time() val, ts_prof = ts_new - ts_prof, ts_new data.append(('{}.process'.format(self_profiling), ts_now, val, val)) data.extend(it.chain.from_iterable(it.starmap( ft.partial(mangler.process_cluster, ts=ts_now), xml ))) log.debug('Publishing {} datapoints'.format(len(data))) if optz.dump: pprint(data) if not optz.dry_run: amqp.publish(data) if self_profiling: ts_new = time() val, ts_prof = ts_new - ts_prof, ts_new data = [('{}.publish'.format(self_profiling), ts_now, val, val)] while ts <= ts_now: ts += cfg.metrics.interval ts_sleep = max(0, ts - time()) log.debug('Sleep: {}s'.format(ts_sleep)) sleep(ts_sleep)
def gmond_poll( sources, timeout=graphite_min_cycle, to_escalate=None, to_break=None, src_escalate=[1, 1, 2.0], default_port=8649, libc_gethostbyname=gethostname ): '''XML with values is fetched from possibly-multiple sources, first full dump received is returned. sources: iterable of sources to query - either hostname/ip or tuple of (hostname/ip, port) src_escalate: # number of sources to query simultaneously in the beginning and add after each to_escalate int - how many sources to query after each to_escalate passes, float (0-1.0) - percentage of sources, or iterable of ints/floats - value to use for each step, last one being used for the rest to_escalate: # timeout before starting querying additional sources int/float or iterable of these ([1,2,3] would mean "wait 1s, then 2s, then 3s") to_break: int/float # timeout to stop waiting for data for one source (break connection) timeout: int/float # global timeout (not counting libc.gethostbyname for all sources, if used), also used to calculate sensible values for to_*, if none specified''' log = logging.getLogger('gmond_amqp.poller') # Otherwise gevent does it's own (although parallel) # gethostbyname, ignoring libc (ldap, nis, /etc/hosts), which is wrong # Obvious downside, is that it's serial - i.e. all hosts will be resolved here and now, # before any actual xml fetching takes place, can be delayed but won't suck any less if not libc_gethostbyname: libc_gethostbyname = lambda x: x sources = list( (libc_gethostbyname(src[0]), int(src[1]) if len(src)>1 else default_port) for src in ((src.rsplit(':', 1) if isinstance( src, types.StringTypes ) else src) for src in sources) ) # First calculate number of escalation tiers, then pick proper intervals src_escalate = list(reversed( src_escalate if isinstance(src_escalate, Iterable) else [src_escalate] )) src_slice, src_count = src_escalate.pop(), len(sources) src_tiers = list() while sources: src_tier, sources = sources[:src_slice], sources[src_slice:] src_tiers.append(src_tier) if src_escalate: src_slice = src_escalate.pop() if isinstance(src_slice, float): src_slice = int(src_count / src_slice) if to_escalate is None: to_escalate = [ 1, # 1s should be enough for everyone! ((timeout - 1) / 2.0) / ((len(src_tiers) - 1) or 1) ] # so they'll fit in half-timeout if not isinstance(to_escalate, Iterable): to_escalate = [to_escalate] if to_break is None: to_break = timeout src_tiers = zip(it.chain(to_escalate, it.repeat(to_escalate[-1])), src_tiers) log.debug('Escalation tiers: {}'.format(src_tiers)) def fetch_from_src(source): try: with Timeout(to_break),\ closing(socket.socket( socket.AF_INET, socket.SOCK_STREAM )) as sock: log.debug('Fetching from source: {}'.format(source)) sock.connect(source) buff = bytes() while True: chunk = sock.recv(1*2**20) if not chunk: return buff buff += chunk except (Timeout, socket.error) as err: log.debug('Connection to source {} failed ({err})'.format(source, err=err)) return DataPollError # indicates failure src_tiers = list(reversed(src_tiers)) queries, result, sentinel = Group(), Queue(), None try: with Timeout(timeout): while src_tiers: to, src_tier = src_tiers.pop() for src in src_tier: src = queries.spawn(fetch_from_src, src) src.link(result.put) src.link_exception() if sentinel is None or sentinel.ready(): sentinel = gevent.spawn(queries.join) sentinel.link(result.put) # to break/escalate if they all died try: with Timeout(to if src_tiers else None): while True: res = result.get(block=True).get(block=True, timeout=0) if res is None: raise Timeout elif res is not DataPollError: return res except Timeout: pass if src_tiers: log.debug('Escalating to the next tier: {}'.format(src_tiers[-1])) else: raise Timeout except Timeout: raise DataPollError('No sources could be reached in time') finally: queries.kill(block=True)