def test_non_primary_accepts_pre_prepare_time(looper, txnPoolNodeSet,
                                              sdk_wallet_client,
                                              sdk_pool_handle):
    """
    One of the non-primary has an in-correct clock so it thinks PRE-PREPARE
    has incorrect time
    """
    sdk_send_random_and_check(looper,
                              txnPoolNodeSet,
                              sdk_pool_handle,
                              sdk_wallet_client,
                              count=2)
    # send_reqs_to_nodes_and_verify_all_replies(looper, wallet1, client1, 2)
    # The replica having the bad clock
    confused_npr = getNonPrimaryReplicas(txnPoolNodeSet, 0)[-1]

    make_clock_faulty(confused_npr.node)

    old_acceptable_rvs = getAllReturnVals(
        confused_npr, confused_npr.is_pre_prepare_time_acceptable)
    old_susp_count = get_timestamp_suspicion_count(confused_npr.node)
    sdk_send_random_and_check(looper,
                              txnPoolNodeSet,
                              sdk_pool_handle,
                              sdk_wallet_client,
                              count=2)

    assert get_timestamp_suspicion_count(confused_npr.node) > old_susp_count

    new_acceptable_rvs = getAllReturnVals(
        confused_npr, confused_npr.is_pre_prepare_time_acceptable)

    # `is_pre_prepare_time_acceptable` first returned False then returned True
    assert [True, False, *old_acceptable_rvs] == new_acceptable_rvs
def test_non_primary_accepts_pre_prepare_time(looper, txnPoolNodeSet,
                                              sdk_wallet_client, sdk_pool_handle):
    """
    One of the non-primary has an in-correct clock so it thinks PRE-PREPARE
    has incorrect time
    """
    sdk_send_random_and_check(looper,
                              txnPoolNodeSet,
                              sdk_pool_handle,
                              sdk_wallet_client,
                              count=2)
    # send_reqs_to_nodes_and_verify_all_replies(looper, wallet1, client1, 2)
    # The replica having the bad clock
    confused_npr = getNonPrimaryReplicas(txnPoolNodeSet, 0)[-1]

    make_clock_faulty(confused_npr.node)

    old_acceptable_rvs = getAllReturnVals(
        confused_npr, confused_npr.is_pre_prepare_time_acceptable)
    old_susp_count = get_timestamp_suspicion_count(confused_npr.node)
    sdk_send_random_and_check(looper,
                              txnPoolNodeSet,
                              sdk_pool_handle,
                              sdk_wallet_client,
                              count=2)

    assert get_timestamp_suspicion_count(confused_npr.node) > old_susp_count

    new_acceptable_rvs = getAllReturnVals(
        confused_npr, confused_npr.is_pre_prepare_time_acceptable)

    # `is_pre_prepare_time_acceptable` first returned False then returned True
    assert [True, False, *old_acceptable_rvs] == new_acceptable_rvs
def test_new_node_accepts_timestamp(tconf, looper, txnPoolNodeSet,
                                    nodeSetWithNodeAddedAfterSomeTxns, client1,
                                    wallet1, client1Connected):
    """
    A new node joins the pool and is able to function properly without
    """
    _, new_node, _, _, _, _ = nodeSetWithNodeAddedAfterSomeTxns
    old_susp_count = get_timestamp_suspicion_count(new_node)
    # Don't wait for node to catchup, start sending requests
    sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 10)
    waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1])

    # No suspicions were raised by new_node
    assert get_timestamp_suspicion_count(new_node) == old_susp_count

    # All nodes should reply
    send_reqs_to_nodes_and_verify_all_replies(looper, wallet1, client1,
                                              Max3PCBatchSize * 3)
    # No suspicions were raised by new_node
    assert get_timestamp_suspicion_count(new_node) == old_susp_count

    suspicions = {
        node.name: get_timestamp_suspicion_count(node)
        for node in txnPoolNodeSet
    }
    ensure_view_change(looper, txnPoolNodeSet)
    ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)

    send_reqs_to_nodes_and_verify_all_replies(looper, wallet1, client1,
                                              Max3PCBatchSize * 3)
    for node in txnPoolNodeSet:
        assert suspicions[node.name] == get_timestamp_suspicion_count(node)
Esempio n. 4
0
def test_pp_obsolescence_check_fail_for_delayed(tdir, tconf, looper,
                                                txnPoolNodeSet,
                                                sdk_pool_handle,
                                                sdk_wallet_client):

    delay = PATCHED_ACCEPTABLE_DEVIATION_PREPREPARE_SECS + 1
    lagging_node = txnPoolNodeSet[-1]

    # Prevent lagging node from ordering
    with delay_rules(lagging_node.nodeIbStasher, ppDelay(), pDelay(),
                     cDelay()):
        # Order request on all nodes except lagging one
        sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                                  sdk_wallet_client, 1)
        looper.run(asyncio.sleep(delay))

    # Now delayed 3PC messages reach lagging node, so any delayed transactions
    # can be processed (PrePrepare would be discarded but requested after that),
    # ensure that all nodes will have same data after that
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)

    pp_count = get_count(lagging_node.master_replica,
                         lagging_node.master_replica.processPrePrepare)

    assert pp_count > 0
    assert get_timestamp_suspicion_count(lagging_node) == pp_count
Esempio n. 5
0
def test_first_audit_catchup_during_ordering(tdir, tconf, looper,
                                             txnPoolNodeSet, sdk_pool_handle,
                                             sdk_wallet_client):
    lagging_node = txnPoolNodeSet[-1]
    other_nodes = txnPoolNodeSet[:-1]
    other_stashers = [node.nodeIbStasher for node in other_nodes]

    def lagging_node_state() -> NodeLeecherService.State:
        return lagging_node.ledgerManager._node_leecher._state

    def check_lagging_node_is_not_syncing_audit():
        assert lagging_node_state() != NodeLeecherService.State.SyncingAudit

    # Prevent lagging node from catching up domain ledger (and finishing catchup)
    with delay_rules(other_stashers, delay_domain_ledger_catchup()):
        # Start catchup on lagging node
        lagging_node.start_catchup()
        assert lagging_node_state() == NodeLeecherService.State.SyncingAudit

        # Ensure that audit ledger is caught up by lagging node
        looper.run(eventually(check_lagging_node_is_not_syncing_audit))
        assert lagging_node_state() != NodeLeecherService.State.Idle

        # Order request on all nodes except lagging one where they goes to stashed state
        sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                                  sdk_wallet_client, 1)

    # Now catchup should end and lagging node starts processing stashed PPs
    # and resumes ordering

    # ensure that all nodes will have same data after that
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)

    # ensure that no suspicions about obsolete PP have been raised
    assert get_timestamp_suspicion_count(lagging_node) == 0
    def chk():
        for node in [n for n in txnPoolNodeSet if n != faulty_node]:
            # Each non faulty node raises suspicion
            assert get_timestamp_suspicion_count(node) > susp_counts[node.name]
            # Ledger does not change
            assert node.domainLedger.size == ledger_sizes[node.name]

        assert faulty_node.domainLedger.size == ledger_sizes[faulty_node.name]
def test_nodes_with_bad_clock(tconf, looper, txnPoolNodeSet,
                              sdk_wallet_client, sdk_pool_handle):
    """
    All nodes have bad clocks but they eventaully get repaired, an example of
    nodes being cut off from NTP server for some time or NTP sync disabled
    then without node restart NTP sync enabled
    """
    sdk_send_random_and_check(looper,
                              txnPoolNodeSet,
                              sdk_pool_handle,
                              sdk_wallet_client,
                              count=Max3PCBatchSize * 3)
    ledger_sizes = {node.name: node.domainLedger.size for node in
                    txnPoolNodeSet}
    susp_counts = {node.name: get_timestamp_suspicion_count(node) for node in
                   txnPoolNodeSet}
    for node in txnPoolNodeSet:
        make_clock_faulty(
            node,
            clock_slow_by_sec=node.config.ACCEPTABLE_DEVIATION_PREPREPARE_SECS +
                              randint(
                                  5,
                                  15),
            ppr_always_wrong=False)

    for _ in range(5):
        sdk_send_random_request(looper, sdk_pool_handle, sdk_wallet_client)
        looper.runFor(.2)

    # Let some time pass
    looper.runFor(3)

    def chk():
        for node in txnPoolNodeSet:
            # Each node raises suspicion
            assert get_timestamp_suspicion_count(node) > susp_counts[node.name]
            # Ledger does not change
            assert node.domainLedger.size == ledger_sizes[node.name]

    looper.run(eventually(chk, retryWait=1))

    # Fix clocks
    for node in txnPoolNodeSet:
        def utc_epoch(self) -> int:
            return get_utc_epoch()

        node.utc_epoch = types.MethodType(utc_epoch, node)

    # Let some more time pass
    looper.runFor(3)

    # All nodes reply
    sdk_send_random_and_check(looper,
                              txnPoolNodeSet,
                              sdk_pool_handle,
                              sdk_wallet_client,
                              count=Max3PCBatchSize * 2)
Esempio n. 8
0
def test_new_primary_has_wrong_clock(tconf, looper, txnPoolNodeSet,
                                     sdk_wallet_client, sdk_pool_handle):
    """
    One of non-primary has a bad clock, it raises suspicions but orders
    requests after getting PREPAREs. Then a view change happens this
    non-primary with the bad clock becomes the new primary but is not able to
    get any of it's PRE-PREPAREs ordered. Eventually another view change
    happens and a new primary is elected the pool is functional again
    :return:
    """
    # The node having the bad clock, this node will be primary after view
    # change
    faulty_node = getNonPrimaryReplicas(txnPoolNodeSet, 0)[0].node
    make_clock_faulty(faulty_node)

    assert not faulty_node.master_replica.isPrimary
    # faulty_node replies too

    ledger_sizes = {
        node.name: node.domainLedger.size
        for node in txnPoolNodeSet
    }
    susp_counts = {
        node.name: get_timestamp_suspicion_count(node)
        for node in txnPoolNodeSet
    }
    ensure_view_change(looper, txnPoolNodeSet)
    ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)

    # After view change, faulty_node is primary.
    # But after it sent first pp, new view change happens
    assert txnPoolNodeSet[2].master_replica.isPrimary

    def chk():
        for node in txnPoolNodeSet:
            assert node.viewNo == 2

        for node in [n for n in txnPoolNodeSet if n != faulty_node]:
            # Each non faulty node raises suspicion
            assert get_timestamp_suspicion_count(node) > susp_counts[node.name]
            # 1 view txn wasn't written
            assert any(txn[1]['txn']['data']['viewNo'] == 2
                       for txn in node.auditLedger.getAllTxn())
            assert not any(txn[1]['txn']['data']['viewNo'] == 1
                           for txn in node.auditLedger.getAllTxn())

        assert faulty_node.domainLedger.size == ledger_sizes[faulty_node.name]

    looper.run(eventually(chk, retryWait=1))

    # All nodes reply
    sdk_send_random_and_check(looper,
                              txnPoolNodeSet,
                              sdk_pool_handle,
                              sdk_wallet_client,
                              count=Max3PCBatchSize * 2)
        def chk():
            for node in txnPoolNodeSet:
                assert node.viewNo == old_view_no

            for node in [n for n in txnPoolNodeSet if n != faulty_node]:
                # Each non faulty node raises suspicion
                assert get_timestamp_suspicion_count(node) > susp_counts[node.name]
                # Ledger does not change
                assert node.domainLedger.size == ledger_sizes[node.name]

            assert faulty_node.domainLedger.size == ledger_sizes[faulty_node.name]
Esempio n. 10
0
def test_new_node_accepts_timestamp(tconf, looper, txnPoolNodeSet,
                                    sdk_node_created_after_some_txns,
                                    sdk_wallet_client, sdk_pool_handle):
    """
    A new node joins the pool and is able to function properly without
    """
    _, new_node, _, _ = sdk_node_created_after_some_txns
    old_susp_count = get_timestamp_suspicion_count(new_node)
    # Don't wait for node to catchup, start sending requests
    sdk_send_random_and_check(looper,
                              txnPoolNodeSet,
                              sdk_pool_handle,
                              sdk_wallet_client,
                              count=10)
    waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1])

    # No suspicions were raised by new_node
    assert get_timestamp_suspicion_count(new_node) == old_susp_count

    # All nodes should reply
    sdk_send_random_and_check(looper,
                              txnPoolNodeSet,
                              sdk_pool_handle,
                              sdk_wallet_client,
                              count=Max3PCBatchSize * 3)
    # No suspicions were raised by new_node
    assert get_timestamp_suspicion_count(new_node) == old_susp_count

    suspicions = {
        node.name: get_timestamp_suspicion_count(node)
        for node in txnPoolNodeSet
    }
    ensure_view_change(looper, txnPoolNodeSet)
    ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)
    sdk_send_random_and_check(looper,
                              txnPoolNodeSet,
                              sdk_pool_handle,
                              sdk_wallet_client,
                              count=Max3PCBatchSize * 3)
    for node in txnPoolNodeSet:
        assert suspicions[node.name] == get_timestamp_suspicion_count(node)
def test_new_node_accepts_timestamp(tconf, looper, txnPoolNodeSet,
                                    sdk_node_created_after_some_txns,
                                    sdk_wallet_client, sdk_pool_handle):
    """
    A new node joins the pool and is able to function properly without
    """
    _, new_node, _, _ = sdk_node_created_after_some_txns
    old_susp_count = get_timestamp_suspicion_count(new_node)
    # Don't wait for node to catchup, start sending requests
    sdk_send_random_and_check(looper,
                              txnPoolNodeSet,
                              sdk_pool_handle,
                              sdk_wallet_client,
                              count=10)
    waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1])

    # No suspicions were raised by new_node
    assert get_timestamp_suspicion_count(new_node) == old_susp_count

    # All nodes should reply
    sdk_send_random_and_check(looper,
                              txnPoolNodeSet,
                              sdk_pool_handle,
                              sdk_wallet_client,
                              count=Max3PCBatchSize * 3)
    # No suspicions were raised by new_node
    assert get_timestamp_suspicion_count(new_node) == old_susp_count

    suspicions = {node.name: get_timestamp_suspicion_count(
        node) for node in txnPoolNodeSet}
    ensure_view_change(looper, txnPoolNodeSet)
    ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)
    sdk_send_random_and_check(looper,
                              txnPoolNodeSet,
                              sdk_pool_handle,
                              sdk_wallet_client,
                              count=Max3PCBatchSize * 3)
    for node in txnPoolNodeSet:
        assert suspicions[node.name] == get_timestamp_suspicion_count(node)
Esempio n. 12
0
    def chk():
        for node in txnPoolNodeSet:
            assert node.viewNo == 2

        for node in [n for n in txnPoolNodeSet if n != faulty_node]:
            # Each non faulty node raises suspicion
            assert get_timestamp_suspicion_count(node) > susp_counts[node.name]
            # 1 view txn wasn't written
            assert any(txn[1]['txn']['data']['viewNo'] == 2
                       for txn in node.auditLedger.getAllTxn())
            assert not any(txn[1]['txn']['data']['viewNo'] == 1
                           for txn in node.auditLedger.getAllTxn())

        assert faulty_node.domainLedger.size == ledger_sizes[faulty_node.name]
Esempio n. 13
0
def test_stashed_pp_pass_obsolescence_check(tdir, tconf,
                                     looper,
                                     txnPoolNodeSet,
                                     sdk_pool_handle,
                                     sdk_wallet_client):
    lagging_node = txnPoolNodeSet[-1]

    def lagging_node_state() -> NodeLeecherService.State:
        return lagging_node.ledgerManager._node_leecher._state

    # TODO INDY-2047: fills domain ledger with some requests
    # as a workaround for the issue
    sdk_send_random_and_check(looper, txnPoolNodeSet,
                              sdk_pool_handle, sdk_wallet_client, 1)

    # Prevent lagging node from catching up domain ledger (and finishing catchup)
    with delay_rules(lagging_node.nodeIbStasher, delay_audit_ledger_catchup()):
        # Start catchup on lagging node
        lagging_node.ledgerManager.start_catchup()
        assert lagging_node_state() == NodeLeecherService.State.SyncingAudit

        # Order request on all nodes except lagging one where they goes to stashed state
        sdk_send_random_and_check(looper, txnPoolNodeSet,
                                  sdk_pool_handle, sdk_wallet_client, 1)

        # lagging node is still syncing Audit ledger
        assert lagging_node_state() == NodeLeecherService.State.SyncingAudit

        # delay catchup end to exceed PP ACCEPTABLE_DEVIATION_PREPREPARE_SECS
        looper.runFor(PATCHED_ACCEPTABLE_DEVIATION_PREPREPARE_SECS + 1)

    # Now catchup should end and lagging node starts processing stashed PPs
    # and resumes ordering

    # ensure that all nodes will have same data after that
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)

    # ensure that no suspicions about obsolete PP have been raised
    assert get_timestamp_suspicion_count(lagging_node) == 0
def test_new_primary_has_wrong_clock(tconf, looper, txnPoolNodeSet,
                                     sdk_wallet_client, sdk_pool_handle):
    """
    One of non-primary has a bad clock, it raises suspicions but orders
    requests after getting PREPAREs. Then a view change happens this
    non-primary with the bad clock becomes the new primary but is not able to
    get any of it's PRE-PREPAREs ordered. Eventually another view change
    happens and a new primary is elected the pool is functional again
    :return:
    """
    # The node having the bad clock, this node will be primary after view
    # change
    faulty_node = getNonPrimaryReplicas(txnPoolNodeSet, 0)[0].node
    make_clock_faulty(faulty_node)

    assert not faulty_node.master_replica.isPrimary
    # faulty_node replies too
    sdk_send_random_and_check(looper,
                              txnPoolNodeSet,
                              sdk_pool_handle,
                              sdk_wallet_client,
                              count=Max3PCBatchSize * 3)

    ledger_sizes = {
        node.name: node.domainLedger.size for node in txnPoolNodeSet}
    susp_counts = {node.name: get_timestamp_suspicion_count(
        node) for node in txnPoolNodeSet}
    ensure_view_change(looper, txnPoolNodeSet)
    ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)

    # After view change, faulty_node is primary
    assert faulty_node.master_replica.isPrimary

    old_view_no = txnPoolNodeSet[0].viewNo

    # Delay instance change so view change doesn't happen in the middle of this test
    stashers = (n.nodeIbStasher for n in txnPoolNodeSet)
    with delay_rules(stashers, icDelay()):
        # Requests are sent
        for _ in range(5):
            sdk_send_random_requests(looper,
                                    sdk_pool_handle,
                                    sdk_wallet_client,
                                    count=2)
            looper.runFor(2)

        def chk():
            for node in txnPoolNodeSet:
                assert node.viewNo == old_view_no

            for node in [n for n in txnPoolNodeSet if n != faulty_node]:
                # Each non faulty node raises suspicion
                assert get_timestamp_suspicion_count(node) > susp_counts[node.name]
                # Ledger does not change
                assert node.domainLedger.size == ledger_sizes[node.name]

            assert faulty_node.domainLedger.size == ledger_sizes[faulty_node.name]

        looper.run(eventually(chk, retryWait=1))


    # Eventually another view change happens
    looper.run(eventually(checkViewNoForNodes, txnPoolNodeSet, old_view_no + 1,
                          retryWait=1, timeout=2 * tconf.PerfCheckFreq))
    ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)

    # After view change, faulty_node is no more the primary
    assert not faulty_node.master_replica.isPrimary

    # All nodes reply
    sdk_send_random_and_check(looper,
                              txnPoolNodeSet,
                              sdk_pool_handle,
                              sdk_wallet_client,
                              count=Max3PCBatchSize * 2)
def test_new_primary_has_wrong_clock(tconf, looper, txnPoolNodeSet,
                                     sdk_wallet_client, sdk_pool_handle):
    """
    One of non-primary has a bad clock, it raises suspicions but orders
    requests after getting PREPAREs. Then a view change happens this
    non-primary with the bad clock becomes the new primary but is not able to
    get any of it's PRE-PREPAREs ordered. Eventually another view change
    happens and a new primary is elected the pool is functional again
    :return:
    """
    # The node having the bad clock, this node will be primary after view
    # change
    faulty_node = getNonPrimaryReplicas(txnPoolNodeSet, 0)[0].node
    make_clock_faulty(faulty_node)

    assert not faulty_node.master_replica.isPrimary
    # faulty_node replies too

    ledger_sizes = {
        node.name: node.domainLedger.size for node in txnPoolNodeSet}
    susp_counts = {node.name: get_timestamp_suspicion_count(
        node) for node in txnPoolNodeSet}
    ensure_view_change(looper, txnPoolNodeSet)
    ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)

    # After view change, faulty_node is primary
    assert faulty_node.master_replica.isPrimary

    old_view_no = txnPoolNodeSet[0].viewNo

    # Delay instance change so view change doesn't happen in the middle of this test
    stashers = (n.nodeIbStasher for n in txnPoolNodeSet)
    with delay_rules(stashers, icDelay()):
        # Requests are sent
        for _ in range(5):
            sdk_send_random_requests(looper,
                                     sdk_pool_handle,
                                     sdk_wallet_client,
                                     count=2)
            looper.runFor(2)

        def chk():
            for node in txnPoolNodeSet:
                assert node.viewNo == old_view_no

            for node in [n for n in txnPoolNodeSet if n != faulty_node]:
                # Each non faulty node raises suspicion
                assert get_timestamp_suspicion_count(node) > susp_counts[node.name]
                # Ledger does not change
                assert node.domainLedger.size == ledger_sizes[node.name]

            assert faulty_node.domainLedger.size == ledger_sizes[faulty_node.name]

        looper.run(eventually(chk, retryWait=1))

    # Eventually another view change happens
    ensure_view_change(looper, txnPoolNodeSet)
    looper.run(eventually(checkViewNoForNodes, txnPoolNodeSet, old_view_no + 1,
                          retryWait=1, timeout=2 * tconf.PerfCheckFreq))
    ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)

    # After view change, faulty_node is no more the primary
    assert not faulty_node.master_replica.isPrimary

    # All nodes reply
    sdk_send_random_and_check(looper,
                              txnPoolNodeSet,
                              sdk_pool_handle,
                              sdk_wallet_client,
                              count=Max3PCBatchSize * 2)
 def chk():
     for node in txnPoolNodeSet:
         # Each node raises suspicion
         assert get_timestamp_suspicion_count(node) > susp_counts[node.name]
         # Ledger does not change
         assert node.domainLedger.size == ledger_sizes[node.name]