def _http_retry_delay() -> Iterable[float]: # below constants are defined in raiden.app.App.DEFAULT_CONFIG return udp_utils.timeout_exponential_backoff( DEFAULT_TRANSPORT_RETRIES_BEFORE_BACKOFF, int(DEFAULT_TRANSPORT_MATRIX_RETRY_INTERVAL / 5), int(DEFAULT_TRANSPORT_MATRIX_RETRY_INTERVAL), )
def _http_retry_delay() -> Iterable[float]: # below constants are defined in raiden.app.App.DEFAULT_CONFIG return udp_utils.timeout_exponential_backoff( self._config['retries_before_backoff'], self._config['retry_interval'] / 5, self._config['retry_interval'], )
def retry(): timeout_generator = udp_utils.timeout_exponential_backoff( self._raiden_service.config['protocol']['retries_before_backoff'], self._raiden_service.config['protocol']['retry_interval'], self._raiden_service.config['protocol']['retry_interval'] * 10 ) while async_result.value is None: self._send_immediate(receiver_address, data) gevent.sleep(next(timeout_generator))
def retry(): if not self._running: return timeout_generator = udp_utils.timeout_exponential_backoff( self._config['retries_before_backoff'], self._config['retry_interval'], self._config['retry_interval'] * 10, ) while async_result.value is None: self._send_immediate(receiver_address, data) gevent.sleep(next(timeout_generator))
def retry(): if not self._running: return timeout_generator = udp_utils.timeout_exponential_backoff( self._raiden_service.config['transport']['retries_before_backoff'], self._raiden_service.config['transport']['retry_interval'], self._raiden_service.config['transport']['retry_interval'] * 10, ) while async_result.value is None: self._send_immediate(receiver_address, data) gevent.sleep(next(timeout_generator))
def retry(): timeout_generator = udp_utils.timeout_exponential_backoff( self._config['retries_before_backoff'], self._config['retry_interval'], self._config['retry_interval'] * 10, ) for delay in timeout_generator: status = self._address_to_presence.get(receiver_address) if status in reachable: self._send_raw(receiver_address, data) else: self.log.debug( 'Skipping SEND to unreachable node', receiver=pex(receiver_address), status=status, message=message, queue=queue_identifier, ) # equivalent of gevent.sleep, but bails out when stopping if self._stop_event.wait(delay): break # retry while our queue is valid if queue_identifier not in self._queueids_to_queues: self.log.debug( 'Queue cleaned, stop retrying', message=message, queue=queue_identifier, queueids_to_queues=self._queueids_to_queues, ) break # retry while the message is in queue # Delivered and Processed messages should eventually remove them message_in_queue = any( message_id == event.message_identifier for event in self._queueids_to_queues[queue_identifier] ) if not message_in_queue: break
def single_queue_send( transport: 'UDPTransport', recipient: typing.Address, queue: Queue_T, event_stop: Event, event_healthy: Event, event_unhealthy: Event, message_retries: int, message_retry_timeout: int, message_retry_max_timeout: int, ): """ Handles a single message queue for `recipient`. Notes: - This task must be the only consumer of queue. - This task can be killed at any time, but the intended usage is to stop it with the event_stop. - If there are many queues for the same recipient, it is the caller's responsibility to not start them together to avoid congestion. - This task assumes the endpoint is never cleared after it's first known. If this assumption changes the code must be updated to handle unknown addresses. """ # A NotifyingQueue is required to implement cancelability, otherwise the # task cannot be stopped while the greenlet waits for an element to be # inserted in the queue. if not isinstance(queue, NotifyingQueue): raise ValueError('queue must be a NotifyingQueue.') # Reusing the event, clear must be carefully done data_or_stop = event_first_of( queue, event_stop, ) # Wait for the endpoint registration or to quit event_first_of( event_healthy, event_stop, ).wait() while True: data_or_stop.wait() if event_stop.is_set(): return # The queue is not empty at this point, so this won't raise Empty. # This task being the only consumer is a requirement. (messagedata, message_id) = queue.peek(block=False) backoff = timeout_exponential_backoff( message_retries, message_retry_timeout, message_retry_max_timeout, ) try: acknowledged = retry_with_recovery( transport, messagedata, message_id, recipient, event_stop, event_healthy, event_unhealthy, backoff, ) except RaidenShuttingDown: # For a clean shutdown process return if acknowledged: queue.get() # Checking the length of the queue does not trigger a # context-switch, so it's safe to assume the length of the queue # won't change under our feet and when a new item will be added the # event will be set again. if not queue: data_or_stop.clear() if event_stop.is_set(): return
def single_queue_send( transport: 'UDPTransport', recipient: typing.Address, queue: Queue_T, queue_identifier: QueueIdentifier, event_stop: Event, event_healthy: Event, event_unhealthy: Event, message_retries: int, message_retry_timeout: int, message_retry_max_timeout: int, ): """ Handles a single message queue for `recipient`. Notes: - This task must be the only consumer of queue. - This task can be killed at any time, but the intended usage is to stop it with the event_stop. - If there are many queues for the same recipient, it is the caller's responsibility to not start them together to avoid congestion. - This task assumes the endpoint is never cleared after it's first known. If this assumption changes the code must be updated to handle unknown addresses. """ # A NotifyingQueue is required to implement cancelability, otherwise the # task cannot be stopped while the greenlet waits for an element to be # inserted in the queue. if not isinstance(queue, NotifyingQueue): raise ValueError('queue must be a NotifyingQueue.') # Reusing the event, clear must be carefully done data_or_stop = event_first_of( queue, event_stop, ) # Wait for the endpoint registration or to quit log.debug( 'queue: waiting for node to become healthy', node=pex(transport.raiden.address), queue_identifier=queue_identifier, queue_size=len(queue), ) event_first_of( event_healthy, event_stop, ).wait() log.debug( 'queue: processing queue', node=pex(transport.raiden.address), queue_identifier=queue_identifier, queue_size=len(queue), ) while True: data_or_stop.wait() if event_stop.is_set(): log.debug( 'queue: stopping', node=pex(transport.raiden.address), queue_identifier=queue_identifier, queue_size=len(queue), ) return # The queue is not empty at this point, so this won't raise Empty. # This task being the only consumer is a requirement. (messagedata, message_id) = queue.peek(block=False) log.debug( 'queue: sending message', node=pex(transport.raiden.address), recipient=pex(recipient), msgid=message_id, queue_identifier=queue_identifier, queue_size=len(queue), ) backoff = timeout_exponential_backoff( message_retries, message_retry_timeout, message_retry_max_timeout, ) acknowledged = retry_with_recovery( transport, messagedata, message_id, recipient, event_stop, event_healthy, event_unhealthy, backoff, ) if acknowledged: queue.get() # Checking the length of the queue does not trigger a # context-switch, so it's safe to assume the length of the queue # won't change under our feet and when a new item will be added the # event will be set again. if not queue: data_or_stop.clear() if event_stop.is_set(): return
def healthcheck( transport: UDPTransport, recipient: typing.Address, stop_event: Event, event_healthy: Event, event_unhealthy: Event, nat_keepalive_retries: int, nat_keepalive_timeout: int, nat_invitation_timeout: int, ping_nonce: int, ): """ Sends a periodical Ping to `recipient` to check its health. """ # pylint: disable=too-many-branches log.debug( 'starting healthcheck for', node=pex(transport.address), to=pex(recipient), ) # The state of the node is unknown, the events are set to allow the tasks # to do work. last_state = NODE_NETWORK_UNKNOWN transport.set_node_network_state( recipient, last_state, ) # Always call `clear` before `set`, since only `set` does context-switches # it's easier to reason about tasks that are waiting on both events. # Wait for the end-point registration or for the node to quit try: transport.get_host_port(recipient) except UnknownAddress: log.debug( 'waiting for endpoint registration', node=pex(transport.address), to=pex(recipient), ) event_healthy.clear() event_unhealthy.set() backoff = udp_utils.timeout_exponential_backoff( nat_keepalive_retries, nat_keepalive_timeout, nat_invitation_timeout, ) sleep = next(backoff) while not stop_event.wait(sleep): try: transport.get_host_port(recipient) except UnknownAddress: sleep = next(backoff) else: break # Don't wait to send the first Ping and to start sending messages if the # endpoint is known sleep = 0 event_unhealthy.clear() event_healthy.set() while not stop_event.wait(sleep): sleep = nat_keepalive_timeout ping_nonce['nonce'] += 1 messagedata = transport.get_ping(ping_nonce['nonce']) message_id = ('ping', ping_nonce['nonce'], recipient) # Send Ping a few times before setting the node as unreachable acknowledged = udp_utils.retry( transport, messagedata, message_id, recipient, stop_event, [nat_keepalive_timeout] * nat_keepalive_retries, ) if stop_event.is_set(): return if not acknowledged: log.debug( 'node is unresponsive', node=pex(transport.address), to=pex(recipient), current_state=last_state, new_state=NODE_NETWORK_UNREACHABLE, retries=nat_keepalive_retries, timeout=nat_keepalive_timeout, ) # The node is not healthy, clear the event to stop all queue # tasks last_state = NODE_NETWORK_UNREACHABLE transport.set_node_network_state( recipient, last_state, ) event_healthy.clear() event_unhealthy.set() # Retry until recovery, used for: # - Checking node status. # - Nat punching. acknowledged = udp_utils.retry( transport, messagedata, message_id, recipient, stop_event, repeat(nat_invitation_timeout), ) if acknowledged: current_state = views.get_node_network_status( views.state_from_raiden(transport.raiden), recipient, ) if last_state != NODE_NETWORK_REACHABLE: log.debug( 'node answered', node=pex(transport.raiden.address), to=pex(recipient), current_state=current_state, new_state=NODE_NETWORK_REACHABLE, ) last_state = NODE_NETWORK_REACHABLE transport.set_node_network_state( recipient, last_state, ) event_unhealthy.clear() event_healthy.set()