Esempio n. 1
0
def test_udp_ping_pong_unreachable_node(raiden_network, skip_if_not_udp):
    app0, app1 = raiden_network

    app1.raiden.transport.stop_and_wait()

    ping_message = Ping(nonce=0)
    app0.raiden.sign(ping_message)
    ping_encoded = ping_message.encode()

    messageid = ('ping', ping_message.nonce, app1.raiden.address)
    async_result = app0.raiden.transport.maybe_sendraw_with_result(
        app1.raiden.address,
        ping_encoded,
        messageid,
    )

    nat_keepalive_fail = (
        app0.config['transport']['nat_keepalive_timeout'] *
        app0.config['transport']['nat_keepalive_retries'] *
        2  # wait a bit longer to avoid races
    )
    msg = "The message was dropped, it can't be acknowledged"
    assert async_result.wait(nat_keepalive_fail) is None, msg

    network_state = views.get_node_network_status(
        views.state_from_app(app0),
        app1.raiden.address,
    )
    assert network_state is state.NODE_NETWORK_UNREACHABLE
Esempio n. 2
0
def test_udp_ping_pong(raiden_network, skip_if_not_udp):
    app0, app1 = raiden_network

    ping_message = Ping(nonce=0)
    app0.raiden.sign(ping_message)
    ping_encoded = ping_message.encode()

    messageid = ('ping', ping_message.nonce, app1.raiden.address)
    async_result = app0.raiden.transport.maybe_sendraw_with_result(
        app1.raiden.address,
        ping_encoded,
        messageid,
    )
    assert async_result.wait(2), 'The message was not processed'

    network_state = views.get_node_network_status(
        views.state_from_app(app0),
        app1.raiden.address,
    )
    assert network_state is state.NODE_NETWORK_REACHABLE
Esempio n. 3
0
def healthcheck(
        transport: UDPTransport,
        recipient: typing.Address,
        stop_event: Event,
        event_healthy: Event,
        event_unhealthy: Event,
        nat_keepalive_retries: int,
        nat_keepalive_timeout: int,
        nat_invitation_timeout: int,
        ping_nonce: int,
):
    """ Sends a periodical Ping to `recipient` to check its health. """
    # pylint: disable=too-many-branches

    log.debug(
        'starting healthcheck for',
        node=pex(transport.address),
        to=pex(recipient),
    )

    # The state of the node is unknown, the events are set to allow the tasks
    # to do work.
    last_state = NODE_NETWORK_UNKNOWN
    transport.set_node_network_state(
        recipient,
        last_state,
    )

    # Always call `clear` before `set`, since only `set` does context-switches
    # it's easier to reason about tasks that are waiting on both events.

    # Wait for the end-point registration or for the node to quit
    try:
        transport.get_host_port(recipient)
    except UnknownAddress:
        log.debug(
            'waiting for endpoint registration',
            node=pex(transport.address),
            to=pex(recipient),
        )

        event_healthy.clear()
        event_unhealthy.set()

        backoff = udp_utils.timeout_exponential_backoff(
            nat_keepalive_retries,
            nat_keepalive_timeout,
            nat_invitation_timeout,
        )
        sleep = next(backoff)

        while not stop_event.wait(sleep):
            try:
                transport.get_host_port(recipient)
            except UnknownAddress:
                sleep = next(backoff)
            else:
                break

    # Don't wait to send the first Ping and to start sending messages if the
    # endpoint is known
    sleep = 0
    event_unhealthy.clear()
    event_healthy.set()

    while not stop_event.wait(sleep):
        sleep = nat_keepalive_timeout

        ping_nonce['nonce'] += 1
        messagedata = transport.get_ping(ping_nonce['nonce'])
        message_id = ('ping', ping_nonce['nonce'], recipient)

        # Send Ping a few times before setting the node as unreachable
        acknowledged = udp_utils.retry(
            transport,
            messagedata,
            message_id,
            recipient,
            stop_event,
            [nat_keepalive_timeout] * nat_keepalive_retries,
        )

        if stop_event.is_set():
            return

        if not acknowledged:
            log.debug(
                'node is unresponsive',
                node=pex(transport.address),
                to=pex(recipient),
                current_state=last_state,
                new_state=NODE_NETWORK_UNREACHABLE,
                retries=nat_keepalive_retries,
                timeout=nat_keepalive_timeout,
            )

            # The node is not healthy, clear the event to stop all queue
            # tasks
            last_state = NODE_NETWORK_UNREACHABLE
            transport.set_node_network_state(
                recipient,
                last_state,
            )
            event_healthy.clear()
            event_unhealthy.set()

            # Retry until recovery, used for:
            # - Checking node status.
            # - Nat punching.
            acknowledged = udp_utils.retry(
                transport,
                messagedata,
                message_id,
                recipient,
                stop_event,
                repeat(nat_invitation_timeout),
            )

        if acknowledged:
            current_state = views.get_node_network_status(
                views.state_from_raiden(transport.raiden),
                recipient,
            )

            if last_state != NODE_NETWORK_REACHABLE:
                log.debug(
                    'node answered',
                    node=pex(transport.raiden.address),
                    to=pex(recipient),
                    current_state=current_state,
                    new_state=NODE_NETWORK_REACHABLE,
                )

                last_state = NODE_NETWORK_REACHABLE
                transport.set_node_network_state(
                    recipient,
                    last_state,
                )
                event_unhealthy.clear()
                event_healthy.set()
Esempio n. 4
0
 def get_node_network_state(self, node_address):
     """ Returns the currently network status of `node_address`. """
     return views.get_node_network_status(
         views.state_from_raiden(self.raiden),
         node_address,
     )
Esempio n. 5
0
 def get_node_network_state(self, node_address: typing.Address):
     """ Returns the currently network status of `node_address`. """
     return views.get_node_network_status(
         chain_state=views.state_from_raiden(self.raiden),
         node_address=node_address,
     )
Esempio n. 6
0
def healthcheck(
    protocol: 'UDPTransport',
    recipient: typing.Address,
    event_stop: Event,
    event_healthy: Event,
    event_unhealthy: Event,
    nat_keepalive_retries: int,
    nat_keepalive_timeout: int,
    nat_invitation_timeout: int,
    ping_nonce: int,
):
    """ Sends a periodical Ping to `recipient` to check its health. """
    # pylint: disable=too-many-branches

    log.debug(
        'starting healthcheck for',
        node=pex(protocol.raiden.address),
        to=pex(recipient),
    )

    # The state of the node is unknown, the events are set to allow the tasks
    # to do work.
    last_state = NODE_NETWORK_UNKNOWN
    protocol.set_node_network_state(
        recipient,
        last_state,
    )

    # Always call `clear` before `set`, since only `set` does context-switches
    # it's easier to reason about tasks that are waiting on both events.

    # Wait for the end-point registration or for the node to quit
    try:
        protocol.get_host_port(recipient)
    except UnknownAddress:
        log.debug(
            'waiting for endpoint registration',
            node=pex(protocol.raiden.address),
            to=pex(recipient),
        )

        event_healthy.clear()
        event_unhealthy.set()

        backoff = udp_utils.timeout_exponential_backoff(
            nat_keepalive_retries,
            nat_keepalive_timeout,
            nat_invitation_timeout,
        )
        sleep = next(backoff)

        while not event_stop.wait(sleep):
            try:
                protocol.get_host_port(recipient)
            except UnknownAddress:
                sleep = next(backoff)
            else:
                break

    # Don't wait to send the first Ping and to start sending messages if the
    # endpoint is known
    sleep = 0
    event_unhealthy.clear()
    event_healthy.set()

    while not event_stop.wait(sleep):
        sleep = nat_keepalive_timeout

        ping_nonce['nonce'] += 1
        messagedata = protocol.get_ping(ping_nonce['nonce'])
        message_id = ('ping', ping_nonce['nonce'], recipient)

        # Send Ping a few times before setting the node as unreachable
        try:
            acknowledged = udp_utils.retry(
                protocol,
                messagedata,
                message_id,
                recipient,
                event_stop,
                [nat_keepalive_timeout] * nat_keepalive_retries,
            )
        except RaidenShuttingDown:  # For a clean shutdown process
            return

        if event_stop.is_set():
            return

        if not acknowledged:
            log.debug(
                'node is unresponsive',
                node=pex(protocol.raiden.address),
                to=pex(recipient),
                current_state=last_state,
                new_state=NODE_NETWORK_UNREACHABLE,
                retries=nat_keepalive_retries,
                timeout=nat_keepalive_timeout,
            )

            # The node is not healthy, clear the event to stop all queue
            # tasks
            last_state = NODE_NETWORK_UNREACHABLE
            protocol.set_node_network_state(
                recipient,
                last_state,
            )
            event_healthy.clear()
            event_unhealthy.set()

            # Retry until recovery, used for:
            # - Checking node status.
            # - Nat punching.
            try:
                acknowledged = udp_utils.retry(
                    protocol,
                    messagedata,
                    message_id,
                    recipient,
                    event_stop,
                    repeat(nat_invitation_timeout),
                )
            except RaidenShuttingDown:  # For a clean shutdown process
                return

        if acknowledged:
            current_state = views.get_node_network_status(
                views.state_from_raiden(protocol.raiden),
                recipient,
            )
            log.debug(
                'node answered',
                node=pex(protocol.raiden.address),
                to=pex(recipient),
                current_state=current_state,
                new_state=NODE_NETWORK_REACHABLE,
            )

            if last_state != NODE_NETWORK_REACHABLE:
                last_state = NODE_NETWORK_REACHABLE
                protocol.set_node_network_state(
                    recipient,
                    last_state,
                )
                event_unhealthy.clear()
                event_healthy.set()
Esempio n. 7
0
 def get_node_network_state(self, node_address: Address) -> NetworkState:
     """ Returns the currently network status of `node_address`. """
     return views.get_node_network_status(
         chain_state=views.state_from_raiden(self.raiden),
         node_address=node_address)
Esempio n. 8
0
def healthcheck(
    transport: "UDPTransport",
    recipient: Address,
    stop_event: Event,
    event_healthy: Event,
    event_unhealthy: Event,
    nat_keepalive_retries: int,
    nat_keepalive_timeout: int,
    nat_invitation_timeout: int,
    ping_nonce: Dict[str, Nonce],
):
    """ Sends a periodical Ping to `recipient` to check its health. """
    # pylint: disable=too-many-branches

    log.debug("starting healthcheck for",
              node=pex(transport.address),
              to=pex(recipient))

    # The state of the node is unknown, the events are set to allow the tasks
    # to do work.
    last_state = NODE_NETWORK_UNKNOWN
    transport.set_node_network_state(recipient, last_state)

    # Always call `clear` before `set`, since only `set` does context-switches
    # it's easier to reason about tasks that are waiting on both events.

    # Wait for the end-point registration or for the node to quit
    try:
        transport.get_host_port(recipient)
    except UnknownAddress:
        log.debug("waiting for endpoint registration",
                  node=pex(transport.address),
                  to=pex(recipient))

        event_healthy.clear()
        event_unhealthy.set()

        backoff = udp_utils.timeout_exponential_backoff(
            nat_keepalive_retries, nat_keepalive_timeout,
            nat_invitation_timeout)
        sleep = next(backoff)

        while not stop_event.wait(sleep):
            try:
                transport.get_host_port(recipient)
            except UnknownAddress:
                sleep = next(backoff)
            else:
                break

    # Don't wait to send the first Ping and to start sending messages if the
    # endpoint is known
    sleep = 0
    event_unhealthy.clear()
    event_healthy.set()

    while not stop_event.wait(sleep):
        sleep = nat_keepalive_timeout

        ping_nonce["nonce"] = Nonce(ping_nonce["nonce"] + 1)
        messagedata = transport.get_ping(ping_nonce["nonce"])
        message_id = ("ping", ping_nonce["nonce"], recipient)

        # Send Ping a few times before setting the node as unreachable
        acknowledged = udp_utils.retry(
            transport,
            messagedata,
            message_id,
            recipient,
            stop_event,
            [nat_keepalive_timeout] * nat_keepalive_retries,
        )

        if stop_event.is_set():
            return

        if not acknowledged:
            log.debug(
                "node is unresponsive",
                node=pex(transport.address),
                to=pex(recipient),
                current_state=last_state,
                new_state=NODE_NETWORK_UNREACHABLE,
                retries=nat_keepalive_retries,
                timeout=nat_keepalive_timeout,
            )

            # The node is not healthy, clear the event to stop all queue
            # tasks
            last_state = NODE_NETWORK_UNREACHABLE
            transport.set_node_network_state(recipient, last_state)
            event_healthy.clear()
            event_unhealthy.set()

            # Retry until recovery, used for:
            # - Checking node status.
            # - Nat punching.
            acknowledged = udp_utils.retry(
                transport,
                messagedata,
                message_id,
                recipient,
                stop_event,
                repeat(nat_invitation_timeout),
            )

        if acknowledged:
            current_state = views.get_node_network_status(
                views.state_from_raiden(transport.raiden), recipient)

            if last_state != NODE_NETWORK_REACHABLE:
                log.debug(
                    "node answered",
                    node=pex(transport.raiden.address),
                    to=pex(recipient),
                    current_state=current_state,
                    new_state=NODE_NETWORK_REACHABLE,
                )

                last_state = NODE_NETWORK_REACHABLE
                transport.set_node_network_state(recipient, last_state)
                event_unhealthy.clear()
                event_healthy.set()
Esempio n. 9
0
def get_best_routes(
    chain_state: ChainState,
    token_network_address: TokenNetworkAddress,
    one_to_n_address: Optional[OneToNAddress],
    from_address: InitiatorAddress,
    to_address: TargetAddress,
    amount: PaymentAmount,
    previous_address: Optional[Address],
    pfs_config: Optional[PFSConfig],
    privkey: PrivateKey,
) -> Tuple[Optional[str], List[RouteState], Optional[UUID]]:

    token_network = views.get_token_network_by_address(chain_state,
                                                       token_network_address)
    assert token_network, "The token network must be validated and exist."

    try:
        # networkx returns a generator, consume the result since it will be
        # iterated over multiple times.
        all_neighbors = list(
            networkx.all_neighbors(token_network.network_graph.network,
                                   from_address))
    except networkx.NetworkXError:
        # If `our_address` is not in the graph, no channels opened with the
        # address.
        log.debug(
            "Node does not have a channel in the requested token network.",
            source=to_checksum_address(from_address),
            target=to_checksum_address(to_address),
            amount=amount,
        )
        return ("Node does not have a channel in the requested token network.",
                list(), None)

    error_closed = 0
    error_no_route = 0
    error_no_capacity = 0
    error_not_online = 0
    error_direct = None
    shortest_routes: List[Neighbour] = list()

    # Always use a direct channel if available:
    # - There are no race conditions and the capacity is guaranteed to be
    #   available.
    # - There will be no mediation fees
    # - The transfer will be faster
    if to_address in all_neighbors:
        for channel_id in token_network.partneraddresses_to_channelidentifiers[
                Address(to_address)]:
            channel_state = token_network.channelidentifiers_to_channels[
                channel_id]

            # direct channels don't have fees
            payment_with_fee_amount = PaymentWithFeeAmount(amount)
            is_usable = channel.is_channel_usable_for_new_transfer(
                channel_state, payment_with_fee_amount, None)

            if is_usable is channel.ChannelUsability.USABLE:
                direct_route = RouteState(
                    route=[Address(from_address),
                           Address(to_address)],
                    forward_channel_id=channel_state.canonical_identifier.
                    channel_identifier,
                    estimated_fee=FeeAmount(0),
                )
                return (None, [direct_route], None)

            error_direct = is_usable

    latest_channel_opened_at = BlockNumber(0)
    for partner_address in all_neighbors:
        for channel_id in token_network.partneraddresses_to_channelidentifiers[
                partner_address]:
            channel_state = token_network.channelidentifiers_to_channels[
                channel_id]

            if channel.get_status(channel_state) != ChannelState.STATE_OPENED:
                error_closed += 1
                continue

            latest_channel_opened_at = max(
                latest_channel_opened_at,
                channel_state.open_transaction.finished_block_number)

            try:
                route = networkx.shortest_path(  # pylint: disable=E1121
                    token_network.network_graph.network, partner_address,
                    to_address)
            except (networkx.NetworkXNoPath, networkx.NodeNotFound):
                error_no_route += 1
            else:
                distributable = channel.get_distributable(
                    channel_state.our_state, channel_state.partner_state)

                network_status = views.get_node_network_status(
                    chain_state, channel_state.partner_state.address)

                if distributable < amount:
                    error_no_capacity += 1
                elif network_status != NetworkState.REACHABLE:
                    error_not_online += 1
                else:
                    nonrefundable = amount > channel.get_distributable(
                        channel_state.partner_state, channel_state.our_state)

                    # The complete route includes the initiator, add it to the beginning
                    complete_route = [Address(from_address)] + route
                    neighbour = Neighbour(
                        length=len(route),
                        nonrefundable=nonrefundable,
                        partner_address=partner_address,
                        channelid=channel_state.identifier,
                        route=complete_route,
                    )
                    heappush(shortest_routes, neighbour)

    if not shortest_routes:
        qty_channels = sum(
            len(token_network.
                partneraddresses_to_channelidentifiers[partner_address])
            for partner_address in all_neighbors)
        error_msg = (
            f"None of the existing channels could be used to complete the "
            f"transfer. From the {qty_channels} existing channels. "
            f"{error_closed} are closed. {error_not_online} are not online. "
            f"{error_no_route} don't have a route to the target in the given "
            f"token network. {error_no_capacity} don't have enough capacity for "
            f"the requested transfer.")
        if error_direct is not None:
            error_msg += f"direct channel {error_direct}."

        log.warning(
            "None of the existing channels could be used to complete the transfer",
            from_address=to_checksum_address(from_address),
            to_address=to_checksum_address(to_address),
            error_closed=error_closed,
            error_no_route=error_no_route,
            error_no_capacity=error_no_capacity,
            error_direct=error_direct,
            error_not_online=error_not_online,
        )
        return (error_msg, list(), None)

    if pfs_config is not None and one_to_n_address is not None:
        pfs_error_msg, pfs_routes, pfs_feedback_token = get_best_routes_pfs(
            chain_state=chain_state,
            token_network_address=token_network_address,
            one_to_n_address=one_to_n_address,
            from_address=from_address,
            to_address=to_address,
            amount=amount,
            previous_address=previous_address,
            pfs_config=pfs_config,
            privkey=privkey,
            pfs_wait_for_block=latest_channel_opened_at,
        )

        if not pfs_error_msg:
            # As of version 0.5 it is possible for the PFS to return an empty
            # list of routes without an error message.
            if not pfs_routes:
                return ("PFS could not find any routes", list(), None)

            log.info("Received route(s) from PFS",
                     routes=pfs_routes,
                     feedback_token=pfs_feedback_token)
            return (pfs_error_msg, pfs_routes, pfs_feedback_token)

        log.warning(
            "Request to Pathfinding Service was not successful. "
            "No routes to the target are found.",
            pfs_message=pfs_error_msg,
        )
        return (pfs_error_msg, list(), None)

    else:
        available_routes = list()

        while shortest_routes:
            neighbour = heappop(shortest_routes)

            # https://github.com/raiden-network/raiden/issues/4751
            # Internal routing doesn't know how much fees the initiator will be charged,
            # so it should set a percentage on top of the original amount
            # for the whole route.
            estimated_fee = FeeAmount(
                round(INTERNAL_ROUTING_DEFAULT_FEE_PERC * amount))
            if neighbour.length == 1:  # Target is our direct neighbour, pay no fees.
                estimated_fee = FeeAmount(0)

            available_routes.append(
                RouteState(
                    route=neighbour.route,
                    forward_channel_id=neighbour.channelid,
                    estimated_fee=estimated_fee,
                ))

        return (None, available_routes, None)