def test_udp_ping_pong_unreachable_node(raiden_network, skip_if_not_udp): app0, app1 = raiden_network app1.raiden.transport.stop_and_wait() ping_message = Ping(nonce=0) app0.raiden.sign(ping_message) ping_encoded = ping_message.encode() messageid = ('ping', ping_message.nonce, app1.raiden.address) async_result = app0.raiden.transport.maybe_sendraw_with_result( app1.raiden.address, ping_encoded, messageid, ) nat_keepalive_fail = ( app0.config['transport']['nat_keepalive_timeout'] * app0.config['transport']['nat_keepalive_retries'] * 2 # wait a bit longer to avoid races ) msg = "The message was dropped, it can't be acknowledged" assert async_result.wait(nat_keepalive_fail) is None, msg network_state = views.get_node_network_status( views.state_from_app(app0), app1.raiden.address, ) assert network_state is state.NODE_NETWORK_UNREACHABLE
def test_udp_ping_pong(raiden_network, skip_if_not_udp): app0, app1 = raiden_network ping_message = Ping(nonce=0) app0.raiden.sign(ping_message) ping_encoded = ping_message.encode() messageid = ('ping', ping_message.nonce, app1.raiden.address) async_result = app0.raiden.transport.maybe_sendraw_with_result( app1.raiden.address, ping_encoded, messageid, ) assert async_result.wait(2), 'The message was not processed' network_state = views.get_node_network_status( views.state_from_app(app0), app1.raiden.address, ) assert network_state is state.NODE_NETWORK_REACHABLE
def healthcheck( transport: UDPTransport, recipient: typing.Address, stop_event: Event, event_healthy: Event, event_unhealthy: Event, nat_keepalive_retries: int, nat_keepalive_timeout: int, nat_invitation_timeout: int, ping_nonce: int, ): """ Sends a periodical Ping to `recipient` to check its health. """ # pylint: disable=too-many-branches log.debug( 'starting healthcheck for', node=pex(transport.address), to=pex(recipient), ) # The state of the node is unknown, the events are set to allow the tasks # to do work. last_state = NODE_NETWORK_UNKNOWN transport.set_node_network_state( recipient, last_state, ) # Always call `clear` before `set`, since only `set` does context-switches # it's easier to reason about tasks that are waiting on both events. # Wait for the end-point registration or for the node to quit try: transport.get_host_port(recipient) except UnknownAddress: log.debug( 'waiting for endpoint registration', node=pex(transport.address), to=pex(recipient), ) event_healthy.clear() event_unhealthy.set() backoff = udp_utils.timeout_exponential_backoff( nat_keepalive_retries, nat_keepalive_timeout, nat_invitation_timeout, ) sleep = next(backoff) while not stop_event.wait(sleep): try: transport.get_host_port(recipient) except UnknownAddress: sleep = next(backoff) else: break # Don't wait to send the first Ping and to start sending messages if the # endpoint is known sleep = 0 event_unhealthy.clear() event_healthy.set() while not stop_event.wait(sleep): sleep = nat_keepalive_timeout ping_nonce['nonce'] += 1 messagedata = transport.get_ping(ping_nonce['nonce']) message_id = ('ping', ping_nonce['nonce'], recipient) # Send Ping a few times before setting the node as unreachable acknowledged = udp_utils.retry( transport, messagedata, message_id, recipient, stop_event, [nat_keepalive_timeout] * nat_keepalive_retries, ) if stop_event.is_set(): return if not acknowledged: log.debug( 'node is unresponsive', node=pex(transport.address), to=pex(recipient), current_state=last_state, new_state=NODE_NETWORK_UNREACHABLE, retries=nat_keepalive_retries, timeout=nat_keepalive_timeout, ) # The node is not healthy, clear the event to stop all queue # tasks last_state = NODE_NETWORK_UNREACHABLE transport.set_node_network_state( recipient, last_state, ) event_healthy.clear() event_unhealthy.set() # Retry until recovery, used for: # - Checking node status. # - Nat punching. acknowledged = udp_utils.retry( transport, messagedata, message_id, recipient, stop_event, repeat(nat_invitation_timeout), ) if acknowledged: current_state = views.get_node_network_status( views.state_from_raiden(transport.raiden), recipient, ) if last_state != NODE_NETWORK_REACHABLE: log.debug( 'node answered', node=pex(transport.raiden.address), to=pex(recipient), current_state=current_state, new_state=NODE_NETWORK_REACHABLE, ) last_state = NODE_NETWORK_REACHABLE transport.set_node_network_state( recipient, last_state, ) event_unhealthy.clear() event_healthy.set()
def get_node_network_state(self, node_address): """ Returns the currently network status of `node_address`. """ return views.get_node_network_status( views.state_from_raiden(self.raiden), node_address, )
def get_node_network_state(self, node_address: typing.Address): """ Returns the currently network status of `node_address`. """ return views.get_node_network_status( chain_state=views.state_from_raiden(self.raiden), node_address=node_address, )
def healthcheck( protocol: 'UDPTransport', recipient: typing.Address, event_stop: Event, event_healthy: Event, event_unhealthy: Event, nat_keepalive_retries: int, nat_keepalive_timeout: int, nat_invitation_timeout: int, ping_nonce: int, ): """ Sends a periodical Ping to `recipient` to check its health. """ # pylint: disable=too-many-branches log.debug( 'starting healthcheck for', node=pex(protocol.raiden.address), to=pex(recipient), ) # The state of the node is unknown, the events are set to allow the tasks # to do work. last_state = NODE_NETWORK_UNKNOWN protocol.set_node_network_state( recipient, last_state, ) # Always call `clear` before `set`, since only `set` does context-switches # it's easier to reason about tasks that are waiting on both events. # Wait for the end-point registration or for the node to quit try: protocol.get_host_port(recipient) except UnknownAddress: log.debug( 'waiting for endpoint registration', node=pex(protocol.raiden.address), to=pex(recipient), ) event_healthy.clear() event_unhealthy.set() backoff = udp_utils.timeout_exponential_backoff( nat_keepalive_retries, nat_keepalive_timeout, nat_invitation_timeout, ) sleep = next(backoff) while not event_stop.wait(sleep): try: protocol.get_host_port(recipient) except UnknownAddress: sleep = next(backoff) else: break # Don't wait to send the first Ping and to start sending messages if the # endpoint is known sleep = 0 event_unhealthy.clear() event_healthy.set() while not event_stop.wait(sleep): sleep = nat_keepalive_timeout ping_nonce['nonce'] += 1 messagedata = protocol.get_ping(ping_nonce['nonce']) message_id = ('ping', ping_nonce['nonce'], recipient) # Send Ping a few times before setting the node as unreachable try: acknowledged = udp_utils.retry( protocol, messagedata, message_id, recipient, event_stop, [nat_keepalive_timeout] * nat_keepalive_retries, ) except RaidenShuttingDown: # For a clean shutdown process return if event_stop.is_set(): return if not acknowledged: log.debug( 'node is unresponsive', node=pex(protocol.raiden.address), to=pex(recipient), current_state=last_state, new_state=NODE_NETWORK_UNREACHABLE, retries=nat_keepalive_retries, timeout=nat_keepalive_timeout, ) # The node is not healthy, clear the event to stop all queue # tasks last_state = NODE_NETWORK_UNREACHABLE protocol.set_node_network_state( recipient, last_state, ) event_healthy.clear() event_unhealthy.set() # Retry until recovery, used for: # - Checking node status. # - Nat punching. try: acknowledged = udp_utils.retry( protocol, messagedata, message_id, recipient, event_stop, repeat(nat_invitation_timeout), ) except RaidenShuttingDown: # For a clean shutdown process return if acknowledged: current_state = views.get_node_network_status( views.state_from_raiden(protocol.raiden), recipient, ) log.debug( 'node answered', node=pex(protocol.raiden.address), to=pex(recipient), current_state=current_state, new_state=NODE_NETWORK_REACHABLE, ) if last_state != NODE_NETWORK_REACHABLE: last_state = NODE_NETWORK_REACHABLE protocol.set_node_network_state( recipient, last_state, ) event_unhealthy.clear() event_healthy.set()
def get_node_network_state(self, node_address: Address) -> NetworkState: """ Returns the currently network status of `node_address`. """ return views.get_node_network_status( chain_state=views.state_from_raiden(self.raiden), node_address=node_address)
def healthcheck( transport: "UDPTransport", recipient: Address, stop_event: Event, event_healthy: Event, event_unhealthy: Event, nat_keepalive_retries: int, nat_keepalive_timeout: int, nat_invitation_timeout: int, ping_nonce: Dict[str, Nonce], ): """ Sends a periodical Ping to `recipient` to check its health. """ # pylint: disable=too-many-branches log.debug("starting healthcheck for", node=pex(transport.address), to=pex(recipient)) # The state of the node is unknown, the events are set to allow the tasks # to do work. last_state = NODE_NETWORK_UNKNOWN transport.set_node_network_state(recipient, last_state) # Always call `clear` before `set`, since only `set` does context-switches # it's easier to reason about tasks that are waiting on both events. # Wait for the end-point registration or for the node to quit try: transport.get_host_port(recipient) except UnknownAddress: log.debug("waiting for endpoint registration", node=pex(transport.address), to=pex(recipient)) event_healthy.clear() event_unhealthy.set() backoff = udp_utils.timeout_exponential_backoff( nat_keepalive_retries, nat_keepalive_timeout, nat_invitation_timeout) sleep = next(backoff) while not stop_event.wait(sleep): try: transport.get_host_port(recipient) except UnknownAddress: sleep = next(backoff) else: break # Don't wait to send the first Ping and to start sending messages if the # endpoint is known sleep = 0 event_unhealthy.clear() event_healthy.set() while not stop_event.wait(sleep): sleep = nat_keepalive_timeout ping_nonce["nonce"] = Nonce(ping_nonce["nonce"] + 1) messagedata = transport.get_ping(ping_nonce["nonce"]) message_id = ("ping", ping_nonce["nonce"], recipient) # Send Ping a few times before setting the node as unreachable acknowledged = udp_utils.retry( transport, messagedata, message_id, recipient, stop_event, [nat_keepalive_timeout] * nat_keepalive_retries, ) if stop_event.is_set(): return if not acknowledged: log.debug( "node is unresponsive", node=pex(transport.address), to=pex(recipient), current_state=last_state, new_state=NODE_NETWORK_UNREACHABLE, retries=nat_keepalive_retries, timeout=nat_keepalive_timeout, ) # The node is not healthy, clear the event to stop all queue # tasks last_state = NODE_NETWORK_UNREACHABLE transport.set_node_network_state(recipient, last_state) event_healthy.clear() event_unhealthy.set() # Retry until recovery, used for: # - Checking node status. # - Nat punching. acknowledged = udp_utils.retry( transport, messagedata, message_id, recipient, stop_event, repeat(nat_invitation_timeout), ) if acknowledged: current_state = views.get_node_network_status( views.state_from_raiden(transport.raiden), recipient) if last_state != NODE_NETWORK_REACHABLE: log.debug( "node answered", node=pex(transport.raiden.address), to=pex(recipient), current_state=current_state, new_state=NODE_NETWORK_REACHABLE, ) last_state = NODE_NETWORK_REACHABLE transport.set_node_network_state(recipient, last_state) event_unhealthy.clear() event_healthy.set()
def get_best_routes( chain_state: ChainState, token_network_address: TokenNetworkAddress, one_to_n_address: Optional[OneToNAddress], from_address: InitiatorAddress, to_address: TargetAddress, amount: PaymentAmount, previous_address: Optional[Address], pfs_config: Optional[PFSConfig], privkey: PrivateKey, ) -> Tuple[Optional[str], List[RouteState], Optional[UUID]]: token_network = views.get_token_network_by_address(chain_state, token_network_address) assert token_network, "The token network must be validated and exist." try: # networkx returns a generator, consume the result since it will be # iterated over multiple times. all_neighbors = list( networkx.all_neighbors(token_network.network_graph.network, from_address)) except networkx.NetworkXError: # If `our_address` is not in the graph, no channels opened with the # address. log.debug( "Node does not have a channel in the requested token network.", source=to_checksum_address(from_address), target=to_checksum_address(to_address), amount=amount, ) return ("Node does not have a channel in the requested token network.", list(), None) error_closed = 0 error_no_route = 0 error_no_capacity = 0 error_not_online = 0 error_direct = None shortest_routes: List[Neighbour] = list() # Always use a direct channel if available: # - There are no race conditions and the capacity is guaranteed to be # available. # - There will be no mediation fees # - The transfer will be faster if to_address in all_neighbors: for channel_id in token_network.partneraddresses_to_channelidentifiers[ Address(to_address)]: channel_state = token_network.channelidentifiers_to_channels[ channel_id] # direct channels don't have fees payment_with_fee_amount = PaymentWithFeeAmount(amount) is_usable = channel.is_channel_usable_for_new_transfer( channel_state, payment_with_fee_amount, None) if is_usable is channel.ChannelUsability.USABLE: direct_route = RouteState( route=[Address(from_address), Address(to_address)], forward_channel_id=channel_state.canonical_identifier. channel_identifier, estimated_fee=FeeAmount(0), ) return (None, [direct_route], None) error_direct = is_usable latest_channel_opened_at = BlockNumber(0) for partner_address in all_neighbors: for channel_id in token_network.partneraddresses_to_channelidentifiers[ partner_address]: channel_state = token_network.channelidentifiers_to_channels[ channel_id] if channel.get_status(channel_state) != ChannelState.STATE_OPENED: error_closed += 1 continue latest_channel_opened_at = max( latest_channel_opened_at, channel_state.open_transaction.finished_block_number) try: route = networkx.shortest_path( # pylint: disable=E1121 token_network.network_graph.network, partner_address, to_address) except (networkx.NetworkXNoPath, networkx.NodeNotFound): error_no_route += 1 else: distributable = channel.get_distributable( channel_state.our_state, channel_state.partner_state) network_status = views.get_node_network_status( chain_state, channel_state.partner_state.address) if distributable < amount: error_no_capacity += 1 elif network_status != NetworkState.REACHABLE: error_not_online += 1 else: nonrefundable = amount > channel.get_distributable( channel_state.partner_state, channel_state.our_state) # The complete route includes the initiator, add it to the beginning complete_route = [Address(from_address)] + route neighbour = Neighbour( length=len(route), nonrefundable=nonrefundable, partner_address=partner_address, channelid=channel_state.identifier, route=complete_route, ) heappush(shortest_routes, neighbour) if not shortest_routes: qty_channels = sum( len(token_network. partneraddresses_to_channelidentifiers[partner_address]) for partner_address in all_neighbors) error_msg = ( f"None of the existing channels could be used to complete the " f"transfer. From the {qty_channels} existing channels. " f"{error_closed} are closed. {error_not_online} are not online. " f"{error_no_route} don't have a route to the target in the given " f"token network. {error_no_capacity} don't have enough capacity for " f"the requested transfer.") if error_direct is not None: error_msg += f"direct channel {error_direct}." log.warning( "None of the existing channels could be used to complete the transfer", from_address=to_checksum_address(from_address), to_address=to_checksum_address(to_address), error_closed=error_closed, error_no_route=error_no_route, error_no_capacity=error_no_capacity, error_direct=error_direct, error_not_online=error_not_online, ) return (error_msg, list(), None) if pfs_config is not None and one_to_n_address is not None: pfs_error_msg, pfs_routes, pfs_feedback_token = get_best_routes_pfs( chain_state=chain_state, token_network_address=token_network_address, one_to_n_address=one_to_n_address, from_address=from_address, to_address=to_address, amount=amount, previous_address=previous_address, pfs_config=pfs_config, privkey=privkey, pfs_wait_for_block=latest_channel_opened_at, ) if not pfs_error_msg: # As of version 0.5 it is possible for the PFS to return an empty # list of routes without an error message. if not pfs_routes: return ("PFS could not find any routes", list(), None) log.info("Received route(s) from PFS", routes=pfs_routes, feedback_token=pfs_feedback_token) return (pfs_error_msg, pfs_routes, pfs_feedback_token) log.warning( "Request to Pathfinding Service was not successful. " "No routes to the target are found.", pfs_message=pfs_error_msg, ) return (pfs_error_msg, list(), None) else: available_routes = list() while shortest_routes: neighbour = heappop(shortest_routes) # https://github.com/raiden-network/raiden/issues/4751 # Internal routing doesn't know how much fees the initiator will be charged, # so it should set a percentage on top of the original amount # for the whole route. estimated_fee = FeeAmount( round(INTERNAL_ROUTING_DEFAULT_FEE_PERC * amount)) if neighbour.length == 1: # Target is our direct neighbour, pay no fees. estimated_fee = FeeAmount(0) available_routes.append( RouteState( route=neighbour.route, forward_channel_id=neighbour.channelid, estimated_fee=estimated_fee, )) return (None, available_routes, None)