def test_catchup_with_ledger_statuses_in_old_format_from_one_node(
        txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_steward,
        tconf, tdir, allPluginsPath):
    """
    A node is restarted and during a catch-up receives ledger statuses
    in an old format (without `protocolVersion`) from one of nodes in the pool.
    The test verifies that the node successfully completes the catch-up and
    participates in ordering of further transactions.
    """
    node_to_restart = txnPoolNodeSet[-1]
    other_nodes = txnPoolNodeSet[:-1]

    old_node = txnPoolNodeSet[0]

    sdk_send_random_and_check(looper, txnPoolNodeSet,
                              sdk_pool_handle, sdk_wallet_steward, 5)

    original_get_ledger_status = old_node.getLedgerStatus

    # Patch the method getLedgerStatus to
    # get_ledger_status_without_protocol_version for sending ledger status
    # in old format (without `protocolVersion`)

    def get_ledger_status_without_protocol_version(ledgerId: int):
        original_ledger_status = original_get_ledger_status(ledgerId)
        return LedgerStatusInOldFormat(original_ledger_status.ledgerId,
                                       original_ledger_status.txnSeqNo,
                                       original_ledger_status.viewNo,
                                       original_ledger_status.ppSeqNo,
                                       original_ledger_status.merkleRoot)

    old_node.getLedgerStatus = get_ledger_status_without_protocol_version

    # restart node
    disconnect_node_and_ensure_disconnected(looper,
                                            txnPoolNodeSet,
                                            node_to_restart)
    looper.removeProdable(name=node_to_restart.name)
    sdk_send_random_and_check(looper, txnPoolNodeSet,
                              sdk_pool_handle, sdk_wallet_steward,
                              2)

    # add `node_to_restart` to pool
    node_to_restart = start_stopped_node(node_to_restart, looper, tconf,
                                         tdir, allPluginsPath)
    txnPoolNodeSet[-1] = node_to_restart
    looper.run(checkNodesConnected(txnPoolNodeSet))

    # Verify that `node_to_restart` successfully completes catch-up
    waitNodeDataEquality(looper, node_to_restart, *other_nodes)

    # check discarding ledger statuses from `old_node` for all ledgers
    assert countDiscarded(node_to_restart,
                          'replied message has invalid structure') >= 3

    # Verify that `node_to_restart` participates in ordering
    # of further transactions
    sdk_send_random_and_check(looper, txnPoolNodeSet,
                              sdk_pool_handle, sdk_wallet_steward, 5)
    waitNodeDataEquality(looper, node_to_restart, *other_nodes)
def test_removed_replica_restored_on_view_change(
        looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client,
        tconf, tdir, allPluginsPath, chkFreqPatched, view_change):
    """
    1. Remove replica on some node which is not master primary
    2. Reconnect the node which was master primary so far
    3. Check that nodes and replicas correctly added
    """
    ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)
    node = get_last_master_non_primary_node(txnPoolNodeSet)
    start_replicas_count = node.replicas.num_replicas
    instance_id = start_replicas_count - 1

    node.replicas.remove_replica(instance_id)
    check_replica_removed(node, start_replicas_count, instance_id)

    # trigger view change on all nodes
    master_primary = get_master_primary_node(txnPoolNodeSet)
    disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, master_primary)
    txnPoolNodeSet.remove(master_primary)
    looper.removeProdable(master_primary)
    looper.runFor(tconf.ToleratePrimaryDisconnection + 2)

    restarted_node = start_stopped_node(master_primary, looper, tconf, tdir, allPluginsPath)
    txnPoolNodeSet.append(restarted_node)
    looper.run(checkNodesConnected(txnPoolNodeSet))

    waitForViewChange(looper, txnPoolNodeSet, expectedViewNo=1,
                      customTimeout=2 * tconf.VIEW_CHANGE_TIMEOUT)
    ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)

    assert start_replicas_count == node.replicas.num_replicas
Ejemplo n.º 3
0
def test_removed_replica_restored_on_view_change(
        looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client,
        tconf, tdir, allPluginsPath, chkFreqPatched, view_change):
    """
    1. Remove replica on some node which is not master primary
    2. Reconnect the node which was master primary so far
    3. Check that nodes and replicas correctly added
    """
    ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)
    node = get_last_master_non_primary_node(txnPoolNodeSet)
    start_replicas_count = node.replicas.num_replicas
    instance_id = start_replicas_count - 1

    node.replicas.remove_replica(instance_id)
    check_replica_removed(node, start_replicas_count, instance_id)

    # trigger view change on all nodes
    master_primary = get_master_primary_node(txnPoolNodeSet)
    disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, master_primary)
    txnPoolNodeSet.remove(master_primary)
    looper.removeProdable(master_primary)
    looper.runFor(tconf.ToleratePrimaryDisconnection + 2)

    restarted_node = start_stopped_node(master_primary, looper, tconf, tdir, allPluginsPath)
    txnPoolNodeSet.append(restarted_node)
    looper.run(checkNodesConnected(txnPoolNodeSet))

    waitForViewChange(looper, txnPoolNodeSet, expectedViewNo=1,
                      customTimeout=2 * tconf.NEW_VIEW_TIMEOUT)
    ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)

    assert start_replicas_count == node.replicas.num_replicas
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
Ejemplo n.º 4
0
def test_replica_removing_before_vc_with_primary_disconnected(
        looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, tconf,
        tdir, allPluginsPath, chkFreqPatched, view_change):
    """
    1. Remove replica
    2. Reconnect master primary
    3. Check that nodes and replicas correctly added
    """
    ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)
    node = txnPoolNodeSet[0]
    start_replicas_count = node.replicas.num_replicas
    instance_id = start_replicas_count - 1
    node.replicas.remove_replica(instance_id)
    _check_replica_removed(node, start_replicas_count, instance_id)
    assert not node.monitor.isMasterDegraded()
    assert len(node.requests) == 0
    # trigger view change on all nodes
    disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, node)
    txnPoolNodeSet.remove(node)
    looper.removeProdable(node)
    node = start_stopped_node(node, looper, tconf, tdir, allPluginsPath)
    txnPoolNodeSet.append(node)
    looper.run(checkNodesConnected(txnPoolNodeSet))
    waitForViewChange(looper,
                      txnPoolNodeSet,
                      expectedViewNo=1,
                      customTimeout=2 * tconf.VIEW_CHANGE_TIMEOUT)
    ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)
    assert start_replicas_count == node.replicas.num_replicas
def test_catchup_with_ledger_statuses_in_old_format_from_one_node(
        txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_steward, tconf,
        tdir, allPluginsPath):
    """
    A node is restarted and during a catch-up receives ledger statuses
    in an old format (without `protocolVersion`) from one of nodes in the pool.
    The test verifies that the node successfully completes the catch-up and
    participates in ordering of further transactions.
    """
    node_to_restart = txnPoolNodeSet[-1]
    other_nodes = txnPoolNodeSet[:-1]

    old_node = txnPoolNodeSet[0]

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_steward, 5)

    original_get_ledger_status = old_node.getLedgerStatus

    # Patch the method getLedgerStatus to
    # get_ledger_status_without_protocol_version for sending ledger status
    # in old format (without `protocolVersion`)

    def get_ledger_status_without_protocol_version(ledgerId: int):
        original_ledger_status = original_get_ledger_status(ledgerId)
        return LedgerStatusInOldFormat(original_ledger_status.ledgerId,
                                       original_ledger_status.txnSeqNo,
                                       original_ledger_status.viewNo,
                                       original_ledger_status.ppSeqNo,
                                       original_ledger_status.merkleRoot)

    old_node.getLedgerStatus = get_ledger_status_without_protocol_version

    # restart node
    disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet,
                                            node_to_restart)
    looper.removeProdable(name=node_to_restart.name)
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_steward, 2)

    # add `node_to_restart` to pool
    node_to_restart = start_stopped_node(node_to_restart, looper, tconf, tdir,
                                         allPluginsPath)
    txnPoolNodeSet[-1] = node_to_restart
    looper.run(checkNodesConnected(txnPoolNodeSet))

    # Verify that `node_to_restart` successfully completes catch-up
    waitNodeDataEquality(looper, node_to_restart, *other_nodes)

    # check discarding ledger statuses from `old_node` for all ledgers
    assert countDiscarded(node_to_restart,
                          'replied message has invalid structure') >= 3

    # Verify that `node_to_restart` participates in ordering
    # of further transactions
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_steward, 5)
    waitNodeDataEquality(looper, node_to_restart, *other_nodes)
def test_promotion_before_view_change(looper, txnPoolNodeSet, tdir, tconf,
                                      allPluginsPath, sdk_wallet_stewards,
                                      sdk_pool_handle):

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_stewards[0], 1)
    assert txnPoolNodeSet[0].master_replica.isPrimary
    assert txnPoolNodeSet[1].replicas[1].isPrimary
    assert txnPoolNodeSet[2].replicas[2].isPrimary
    starting_view_number = checkViewNoForNodes(txnPoolNodeSet)

    node_2 = txnPoolNodeSet[1]
    node_3 = txnPoolNodeSet[2]
    node_5 = txnPoolNodeSet[4]

    # Demote node 2
    steward_2 = sdk_wallet_stewards[1]
    demote_node(looper, steward_2, sdk_pool_handle, node_2)
    disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, node_2)
    looper.removeProdable(node_2)
    txnPoolNodeSet.remove(node_2)

    # Checking that view change happened
    # we are expecting 2 view changes here since Beta is selected as a master Primary on view=1
    # (since node reg at the beginning of view 0 is used to select it), but it's not available (demoted),
    # so we do view change to view=2 by timeout
    waitForViewChange(looper,
                      txnPoolNodeSet,
                      expectedViewNo=starting_view_number + 2)
    ensureElectionsDone(looper, txnPoolNodeSet, instances_list=[0, 1])
    assert node_3.master_replica.isPrimary

    # Promoting node 3, increasing replica count
    node_2 = start_stopped_node(node_2, looper, tconf, tdir, allPluginsPath)
    promote_node(looper, steward_2, sdk_pool_handle, node_2)
    txnPoolNodeSet.append(node_2)
    looper.run(checkNodesConnected(txnPoolNodeSet))
    waitForViewChange(looper,
                      txnPoolNodeSet,
                      expectedViewNo=starting_view_number + 3)
    ensureElectionsDone(looper, txnPoolNodeSet, instances_list=[0, 1, 2])
    # node 5 is a primary since promoted node is added at the end of the list
    assert node_5.master_replica.isPrimary

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_stewards[0], 2)
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
def test_replica_removing_with_primary_disconnected(looper,
                                                    txnPoolNodeSet,
                                                    sdk_pool_handle,
                                                    sdk_wallet_client,
                                                    tconf,
                                                    tdir,
                                                    allPluginsPath):
    """
    1. Remove backup primary node.
    2. Check that replicas with the disconnected primary were removed.
    3. Recover the removed node.
    4. Start View Change.
    5. Check that all replicas were restored.
    """
    start_replicas_count = txnPoolNodeSet[0].replicas.num_replicas
    instance_to_remove = 1
    node = txnPoolNodeSet[instance_to_remove]
    # remove backup primary node.
    disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, node)
    txnPoolNodeSet.remove(node)
    looper.removeProdable(node)

    # check that replicas were removed
    def check_replica_removed_on_all_nodes():
        for node in txnPoolNodeSet:
            check_replica_removed(node,
                                  start_replicas_count,
                                  instance_to_remove)
    looper.run(eventually(check_replica_removed_on_all_nodes,
                          timeout=tconf.TolerateBackupPrimaryDisconnection * 4))
    assert not node.monitor.isMasterDegraded()
    assert len(node.requests) == 0

    # recover the removed node
    node = start_stopped_node(node, looper, tconf,
                              tdir, allPluginsPath)
    txnPoolNodeSet.append(node)
    looper.run(checkNodesConnected(txnPoolNodeSet))
    # start View Change
    for node in txnPoolNodeSet:
        node.view_changer.on_master_degradation()
    waitForViewChange(looper, txnPoolNodeSet, expectedViewNo=1,
                      customTimeout=2 * tconf.VIEW_CHANGE_TIMEOUT)
    ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)
    # check that all replicas were restored
    assert start_replicas_count == node.replicas.num_replicas
Ejemplo n.º 8
0
def test_replica_removing_with_primary_disconnected(looper, txnPoolNodeSet,
                                                    sdk_pool_handle,
                                                    sdk_wallet_client, tconf,
                                                    tdir, allPluginsPath):
    """
    1. Remove backup primary node.
    2. Check that replicas with the disconnected primary were removed.
    3. Recover the removed node.
    4. Start View Change.
    5. Check that all replicas were restored.
    """
    start_replicas_count = txnPoolNodeSet[0].replicas.num_replicas
    instance_to_remove = 1
    node = txnPoolNodeSet[instance_to_remove]
    # remove backup primary node.
    disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, node)
    txnPoolNodeSet.remove(node)
    looper.removeProdable(node)

    # check that replicas were removed
    def check_replica_removed_on_all_nodes():
        for node in txnPoolNodeSet:
            check_replica_removed(node, start_replicas_count,
                                  instance_to_remove)

    looper.run(
        eventually(check_replica_removed_on_all_nodes,
                   timeout=tconf.TolerateBackupPrimaryDisconnection * 4))
    assert not node.monitor.isMasterDegraded()
    assert len(node.requests) == 0

    # recover the removed node
    node = start_stopped_node(node, looper, tconf, tdir, allPluginsPath)
    txnPoolNodeSet.append(node)
    looper.run(checkNodesConnected(txnPoolNodeSet))
    # start View Change
    for node in txnPoolNodeSet:
        node.view_changer.on_master_degradation()
    waitForViewChange(looper,
                      txnPoolNodeSet,
                      expectedViewNo=1,
                      customTimeout=2 * tconf.VIEW_CHANGE_TIMEOUT)
    ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)
    # check that all replicas were restored
    assert start_replicas_count == node.replicas.num_replicas
Ejemplo n.º 9
0
def test_promotion_before_view_change(looper,
                                      txnPoolNodeSet,
                                      tdir,
                                      tconf,
                                      allPluginsPath,
                                      sdk_wallet_stewards,
                                      sdk_pool_handle):

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_stewards[0], 1)
    assert txnPoolNodeSet[0].master_replica.isPrimary
    assert txnPoolNodeSet[1].replicas[1].isPrimary
    assert txnPoolNodeSet[2].replicas[2].isPrimary
    starting_view_number = checkViewNoForNodes(txnPoolNodeSet)

    node_2 = txnPoolNodeSet[1]
    node_3 = txnPoolNodeSet[2]
    node_4 = txnPoolNodeSet[3]

    # Demote node 2
    steward_2 = sdk_wallet_stewards[1]
    demote_node(looper, steward_2, sdk_pool_handle, node_2)
    disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, node_2)
    looper.removeProdable(node_2)
    txnPoolNodeSet.remove(node_2)

    # Checking that view change happened
    waitForViewChange(looper,
                      txnPoolNodeSet,
                      expectedViewNo=starting_view_number + 1)
    ensureElectionsDone(looper, txnPoolNodeSet, instances_list=[0, 1])
    assert node_3.master_replica.isPrimary

    # Promoting node 3, increasing replica count
    node_2 = start_stopped_node(node_2, looper, tconf, tdir, allPluginsPath)
    promote_node(looper, steward_2, sdk_pool_handle, node_2)
    txnPoolNodeSet.append(node_2)
    looper.run(checkNodesConnected(txnPoolNodeSet))
    waitForViewChange(looper,
                      txnPoolNodeSet,
                      expectedViewNo=starting_view_number + 3)
    ensureElectionsDone(looper, txnPoolNodeSet, instances_list=[0, 1, 2])
    assert node_4.master_replica.isPrimary

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_stewards[0], 2)
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
def test_promotion_leads_to_correct_primary_selection(looper, txnPoolNodeSet,
                                                      tdir, tconf,
                                                      allPluginsPath,
                                                      sdk_wallet_stewards,
                                                      sdk_pool_handle):
    # We are saving pool state at moment of last view_change to send it
    # to newly connected nodes so they could restore primaries basing on this node set.
    # When current primaries getting edited because of promotion/demotion we don't take this into account.
    # That lead us to primary inconsistency on different nodes

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_stewards[0], 1)
    assert txnPoolNodeSet[0].master_replica.isPrimary
    assert txnPoolNodeSet[1].replicas._replicas[1].isPrimary
    assert txnPoolNodeSet[2].replicas._replicas[2].isPrimary
    starting_view_number = checkViewNoForNodes(txnPoolNodeSet)

    node_1 = txnPoolNodeSet[0]
    node_3 = txnPoolNodeSet[2]

    # Demote node 3
    steward_3 = sdk_wallet_stewards[2]
    demote_node(looper, steward_3, sdk_pool_handle, node_3)
    disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, node_3)
    looper.removeProdable(node_3)
    txnPoolNodeSet.remove(node_3)

    # Checking that view change happened
    waitForViewChange(looper, txnPoolNodeSet, starting_view_number + 1)
    assert all(node.replicas.primary_name_by_inst_id ==
               node_1.replicas.primary_name_by_inst_id
               for node in txnPoolNodeSet)

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_stewards[0], 2)
    for node in txnPoolNodeSet:
        assert node.f == 1
        assert node.replicas.num_replicas == 2

    # restart Node1
    disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, node_1)
    looper.removeProdable(node_1)
    txnPoolNodeSet.remove(node_1)

    node_1 = start_stopped_node(node_1, looper, tconf, tdir, allPluginsPath)
    txnPoolNodeSet.append(node_1)

    # Wait so node_1 could start and catch up
    waitForViewChange(looper, txnPoolNodeSet, starting_view_number + 1)
    assert all(node.replicas.primary_name_by_inst_id ==
               node_1.replicas.primary_name_by_inst_id
               for node in txnPoolNodeSet)
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)

    # Promoting node 3, increasing replica count
    node_3 = start_stopped_node(node_3, looper, tconf, tdir, allPluginsPath)
    promote_node(looper, steward_3, sdk_pool_handle, node_3)
    txnPoolNodeSet.append(node_3)
    looper.run(checkNodesConnected(txnPoolNodeSet))

    # Wait for view change after promotion
    waitForViewChange(looper, txnPoolNodeSet, starting_view_number + 2)
    ensureElectionsDone(looper, txnPoolNodeSet, instances_list=[0, 1, 2])

    # Node 3 able to do ordering
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_stewards[0], 2)
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
def test_promotion_leads_to_primary_inconsistency(looper,
                                                  txnPoolNodeSet,
                                                  tdir,
                                                  tconf,
                                                  allPluginsPath,
                                                  sdk_wallet_stewards,

                                                  sdk_pool_handle):
    # We are saving pool state at moment of last view_change to send it
    # to newly connected nodes so they could restore primaries basing on this node set.
    # When current primaries getting edited because of promotion/demotion we don't take this into account.
    # That lead us to primary inconsistency on different nodes

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_stewards[0], 1)
    assert txnPoolNodeSet[0].master_replica.isPrimary
    assert txnPoolNodeSet[1].replicas._replicas[1].isPrimary
    assert txnPoolNodeSet[2].replicas._replicas[2].isPrimary
    starting_view_number = checkViewNoForNodes(txnPoolNodeSet)

    # Demote node 3
    node_3 = txnPoolNodeSet[2]

    steward_3 = sdk_wallet_stewards[2]
    demote_node(looper, steward_3, sdk_pool_handle, node_3)
    disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, node_3)
    looper.removeProdable(node_3)
    txnPoolNodeSet.remove(node_3)

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_stewards[0], 2)
    for node in txnPoolNodeSet:
        assert node.f == 1
        assert node.replicas.num_replicas == 2

    # Force a view change by stopping master. In this moment we are saving pool state (without 3rd node)
    node_1 = txnPoolNodeSet[0]
    disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, node_1)
    looper.removeProdable(node_1)
    txnPoolNodeSet.remove(node_1)

    # Checking that view change happened
    ensureElectionsDone(looper, txnPoolNodeSet, instances_list=[0, 1])
    view_number = checkViewNoForNodes(txnPoolNodeSet)
    assert view_number == starting_view_number + 1

    node_1 = start_stopped_node(node_1, looper, tconf, tdir, allPluginsPath)
    txnPoolNodeSet.append(node_1)

    # Wait so node_1 could start and finish view_change
    looper.runFor(1)

    # Promoting node 3, increasing replica count
    node_3 = start_stopped_node(node_3, looper, tconf, tdir, allPluginsPath)
    promote_node(looper, steward_3, sdk_pool_handle, node_3)
    txnPoolNodeSet.append(node_3)
    looper.run(checkNodesConnected(txnPoolNodeSet))
    ensureElectionsDone(looper, txnPoolNodeSet, instances_list=[0, 1, 2])

    # Node 3 able to do ordering
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_stewards[0], 2)
    view_number = checkViewNoForNodes(txnPoolNodeSet)
    assert view_number == starting_view_number + 2
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)

    # But it has different primary, cause it uses nodeReg without itself to calculate primaries
    assert all(node.replicas.primary_name_by_inst_id ==
               node_1.replicas.primary_name_by_inst_id
               for node in txnPoolNodeSet if node is not node_3)
    # Fails
    assert all(node.replicas.primary_name_by_inst_id ==
               node_1.replicas.primary_name_by_inst_id
               for node in txnPoolNodeSet)
def test_replica_removing_after_node_started(looper, txnPoolNodeSet,
                                             sdk_pool_handle,
                                             sdk_wallet_client, tconf, tdir,
                                             allPluginsPath,
                                             sdk_wallet_steward):
    """
    1. Remove backup primary node.
    2. Check that replicas with the disconnected primary were removed.
    3. Add new node
    4. Check that in the new node the replica with the disconnected primary were removed.
    3. Recover the removed node.
    4. Start View Change.
    5. Check that all replicas were restored.
    """
    start_view_no = txnPoolNodeSet[0].viewNo
    start_replicas_count = txnPoolNodeSet[0].replicas.num_replicas
    instance_to_remove = txnPoolNodeSet[0].requiredNumberOfInstances - 1
    removed_primary_node = txnPoolNodeSet[instance_to_remove]
    # remove backup primary node.
    disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet,
                                            removed_primary_node)
    txnPoolNodeSet.remove(removed_primary_node)
    looper.removeProdable(removed_primary_node)

    # check that replicas were removed
    def check_replica_removed_on_all_nodes(inst_id=instance_to_remove):
        for node in txnPoolNodeSet:
            check_replica_removed(node, start_replicas_count, inst_id)
            assert not node.monitor.isMasterDegraded()
            assert len(node.requests) == 0

    looper.run(
        eventually(check_replica_removed_on_all_nodes,
                   timeout=tconf.TolerateBackupPrimaryDisconnection * 2))

    new_steward_wallet, new_node = sdk_add_new_steward_and_node(
        looper, sdk_pool_handle, sdk_wallet_steward, "test_steward",
        "test_node", tdir, tconf, allPluginsPath)
    txnPoolNodeSet.append(new_node)
    looper.run(checkNodesConnected(txnPoolNodeSet))
    instance_to_remove -= 1
    waitForViewChange(looper, txnPoolNodeSet, expectedViewNo=start_view_no + 1)
    waitNodeDataEquality(looper,
                         new_node,
                         *txnPoolNodeSet[:-1],
                         exclude_from_check=['check_last_ordered_3pc_backup'])
    looper.run(
        eventually(check_replica_removed,
                   new_node,
                   start_replicas_count,
                   instance_to_remove,
                   timeout=tconf.TolerateBackupPrimaryDisconnection * 2))

    # recover the removed node
    removed_primary_node = start_stopped_node(removed_primary_node, looper,
                                              tconf, tdir, allPluginsPath)
    txnPoolNodeSet.append(removed_primary_node)
    looper.run(checkNodesConnected(txnPoolNodeSet))
    # start View Change
    for node in txnPoolNodeSet:
        node.view_changer.on_master_degradation()
    ensureElectionsDone(
        looper=looper,
        nodes=txnPoolNodeSet,
        instances_list=range(txnPoolNodeSet[0].requiredNumberOfInstances),
        customTimeout=tconf.TolerateBackupPrimaryDisconnection * 2)
    assert start_replicas_count == removed_primary_node.replicas.num_replicas
Ejemplo n.º 13
0
def test_replica_removing_after_view_change(looper, txnPoolNodeSet,
                                            sdk_pool_handle, sdk_wallet_client,
                                            tconf, tdir, allPluginsPath):
    """
    1. Remove backup primary node.
    2. Check that replicas with the disconnected primary were removed.
    3. Start View Change.
    4. Check that the new replica with disconnected primary is removed and
    other replicas are working correctly.
    5. Recover the removed node.
    6. Start View Change.
    7. Check that all replicas were restored.
    """
    start_replicas_count = txnPoolNodeSet[0].replicas.num_replicas
    instance_to_remove = txnPoolNodeSet[0].requiredNumberOfInstances - 1
    removed_node = txnPoolNodeSet[instance_to_remove]
    # remove backup primary node.
    disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet,
                                            removed_node)
    txnPoolNodeSet.remove(removed_node)
    looper.removeProdable(removed_node)

    # check that replicas were removed
    def check_replica_removed_on_all_nodes(inst_id=instance_to_remove):
        for n in txnPoolNodeSet:
            check_replica_removed(n, start_replicas_count, inst_id)
            assert not n.monitor.isMasterDegraded()
            assert len(n.requests) == 0

    looper.run(
        eventually(check_replica_removed_on_all_nodes,
                   timeout=tconf.TolerateBackupPrimaryDisconnection * 2))

    # start View Change
    for node in txnPoolNodeSet:
        node.view_changer.on_master_degradation()
    waitForViewChange(looper,
                      txnPoolNodeSet,
                      expectedViewNo=1,
                      customTimeout=2 * tconf.VIEW_CHANGE_TIMEOUT)
    instance_to_remove -= 1
    instances = list(range(txnPoolNodeSet[0].requiredNumberOfInstances))
    instances.remove(instance_to_remove)
    ensureElectionsDone(
        looper=looper,
        nodes=txnPoolNodeSet,
        instances_list=instances,
        customTimeout=tconf.TolerateBackupPrimaryDisconnection * 4)
    # check that all replicas were restored
    looper.run(
        eventually(check_replica_removed_on_all_nodes,
                   instance_to_remove,
                   timeout=tconf.TolerateBackupPrimaryDisconnection * 2))

    # recover the removed node
    removed_node = start_stopped_node(removed_node, looper, tconf, tdir,
                                      allPluginsPath)
    txnPoolNodeSet.append(removed_node)
    looper.run(checkNodesConnected(txnPoolNodeSet))
    # start View Change
    for node in txnPoolNodeSet:
        node.view_changer.on_master_degradation()
    ensureElectionsDone(
        looper=looper,
        nodes=txnPoolNodeSet,
        instances_list=range(txnPoolNodeSet[0].requiredNumberOfInstances),
        customTimeout=tconf.TolerateBackupPrimaryDisconnection * 2)
    assert start_replicas_count == removed_node.replicas.num_replicas
def test_replica_removing_after_view_change(looper,
                                            txnPoolNodeSet,
                                            sdk_pool_handle,
                                            sdk_wallet_client,
                                            tconf,
                                            tdir,
                                            allPluginsPath):
    """
    1. Remove backup primary node.
    2. Check that replicas with the disconnected primary were removed.
    3. Start View Change.
    4. Check that the new replica with disconnected primary is removed and
    other replicas are working correctly.
    5. Recover the removed node.
    6. Start View Change.
    7. Check that all replicas were restored.
    """
    start_replicas_count = txnPoolNodeSet[0].replicas.num_replicas
    instance_to_remove = txnPoolNodeSet[0].requiredNumberOfInstances - 1
    removed_node = txnPoolNodeSet[instance_to_remove]
    # remove backup primary node.
    disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, removed_node)
    txnPoolNodeSet.remove(removed_node)
    looper.removeProdable(removed_node)

    # check that replicas were removed
    def check_replica_removed_on_all_nodes(inst_id=instance_to_remove):
        for n in txnPoolNodeSet:
            check_replica_removed(n,
                                  start_replicas_count,
                                  inst_id)
            assert not n.monitor.isMasterDegraded()
            assert len(n.requests) == 0

    looper.run(eventually(check_replica_removed_on_all_nodes,
                          timeout=tconf.TolerateBackupPrimaryDisconnection * 2))

    # start View Change
    for node in txnPoolNodeSet:
        node.view_changer.on_master_degradation()
    waitForViewChange(looper, txnPoolNodeSet, expectedViewNo=1,
                      customTimeout=2 * tconf.VIEW_CHANGE_TIMEOUT)
    instance_to_remove -= 1
    instances = list(range(txnPoolNodeSet[0].requiredNumberOfInstances))
    instances.remove(instance_to_remove)
    ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet,
                        instances_list=instances,
                        customTimeout=tconf.TolerateBackupPrimaryDisconnection * 4)
    # check that all replicas were restored
    looper.run(eventually(check_replica_removed_on_all_nodes,
                          instance_to_remove,
                          timeout=tconf.TolerateBackupPrimaryDisconnection * 2))

    # recover the removed node
    removed_node = start_stopped_node(removed_node, looper, tconf,
                                      tdir, allPluginsPath)
    txnPoolNodeSet.append(removed_node)
    looper.run(checkNodesConnected(txnPoolNodeSet))
    # start View Change
    for node in txnPoolNodeSet:
        node.view_changer.on_master_degradation()
    ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet,
                        instances_list=range(txnPoolNodeSet[0].requiredNumberOfInstances),
                        customTimeout=tconf.TolerateBackupPrimaryDisconnection * 2)
    assert start_replicas_count == removed_node.replicas.num_replicas