async def test_slow_path_view_change(self, bft_network):
        """
        This test validates the BFT engine's transition to the slow path
        when the primary goes down. This effectively triggers a view change in the slow path.

        First we write a batch of known K/V entries.

        We check those entries have been processed via the fast commit path.

        We stop the primary and send a batch of requests, triggering slow path & view change.

        We bring the primary back up.

        We make sure the second batch of requests have been processed via the slow path.
        """

        bft_network.start_all_replicas()
        skvbc = kvbc.SimpleKVBCProtocol(bft_network)

        for _ in range(10):
            await skvbc.write_known_kv()

        await bft_network.assert_fast_path_prevalent()

        bft_network.stop_replica(0)

        with trio.move_on_after(seconds=5):
            async with trio.open_nursery() as nursery:
                nursery.start_soon(skvbc.send_indefinite_write_requests)

        bft_network.start_replica(0)

        await bft_network.wait_for_slow_path_to_be_prevalent(as_of_seq_num=10)
    async def test_auto_vc_all_nodes_up_fast_path(self, bft_network, tracker):
        """
        This test aims to validate automatic view change
        while messages are being processed on the fast path
        1) Start a full BFT network
        2) Send a batch of write commands
        3) Make sure view change occurred at some point while processing the writes
        4) Check that all writes have been processed on the fast commit path
        5) Perform a "read-your-writes" check in the new view
        """
        bft_network.start_all_replicas()
        skvbc = kvbc.SimpleKVBCProtocol(bft_network)
        initial_primary = 0

        for _ in range(150):
            key = skvbc.random_key()
            val = skvbc.random_value()
            await tracker.write_and_track_known_kv([(key, val)],
                                                   bft_network.random_client())

        await bft_network.wait_for_view(
            replica_id=random.choice(
                bft_network.all_replicas(without={initial_primary})),
            expected=lambda v: v > initial_primary,
            err_msg="Make sure automatic view change has occurred.")

        await skvbc.assert_kv_write_executed(key, val)
        await bft_network.assert_fast_path_prevalent()

        await tracker.tracked_read_your_writes()
Exemple #3
0
    async def _test_checkpoints_saved_and_transferred(self):
        for bft_config in bft.interesting_configs():
            config = bft.TestConfig(n=bft_config['n'],
                                    f=bft_config['f'],
                                    c=bft_config['c'],
                                    num_clients=bft_config['num_clients'],
                                    key_file_prefix=KEY_FILE_PREFIX,
                                    start_replica_cmd=start_replica_cmd)
            with bft.BftTestNetwork(config) as bft_network:
                skvbc = kvbc.SimpleKVBCProtocol(bft_network)
                stale_node = random.choice(
                    bft_network.all_replicas(without={0}))

                client, known_key, known_kv = \
                    await skvbc.prime_for_state_transfer(stale_nodes={stale_node})

                # Start the replica without any data, and wait for state transfer to
                # complete.
                bft_network.start_replica(stale_node)
                await bft_network.wait_for_state_transfer_to_start()
                up_to_date_node = 0
                await bft_network.wait_for_state_transfer_to_stop(
                    up_to_date_node, stale_node)

                bft_network.force_quorum_including_replica(stale_node)

                # Retrieve the value we put first to ensure state transfer worked
                # when the log went away
                kvpairs = await client.read([known_key])
                self.assertDictEqual(dict(known_kv), kvpairs)

                # Perform a put/get transaction pair to ensure we can read newly
                # written data after state transfer.
                await skvbc.assert_successful_put_get(self)
    async def _test_slow_to_fast_path_transition(self):
        for bft_config in bft.interesting_configs():
            config = bft.TestConfig(n=bft_config['n'],
                                    f=bft_config['f'],
                                    c=bft_config['c'],
                                    num_clients=bft_config['num_clients'],
                                    key_file_prefix=KEY_FILE_PREFIX,
                                    start_replica_cmd=start_replica_cmd)
            with bft.BftTestNetwork(config) as bft_network:
                await bft_network.init()
                bft_network.start_all_replicas()
                skvbc = kvbc.SimpleKVBCProtocol(bft_network)

                unstable_replicas = list(set(range(0, config.n)) - {0})
                crashed_replica = random.choice(unstable_replicas)
                bft_network.stop_replica(crashed_replica)

                for _ in range(10):
                    await skvbc.write_known_kv()

                await bft_network.assert_slow_path_prevalent(as_of_seq_num=1)

                bft_network.start_replica(crashed_replica)

                for _ in range(10):
                    key, val = await skvbc.write_known_kv()

                await bft_network.assert_fast_path_prevalent(
                    as_of_seq_num=10, nb_slow_paths_so_far=10)

                await skvbc.assert_kv_write_executed(key, val)
Exemple #5
0
    async def test_wedge_command_and_specific_replica_info(self, bft_network):
        """
             Sends a wedge command and check that the system stops from processing new requests.
             Note that in this test we assume no failures and synchronized network.
             The test does the following:
             1. A client sends a wedge command
             2. The client then sends a "Have you stopped" read only command such that each replica answers "I have stopped"
             3. The client validates with the metrics that all replicas have stopped
         """
        bft_network.start_all_replicas()
        skvbc = kvbc.SimpleKVBCProtocol(bft_network)
        client = bft_network.random_client()

        await client.write(
            skvbc.write_req([], [], block_id=0, wedge_command=True))

        with trio.fail_after(seconds=90):
            done = False
            while done is False:
                msg = skvbc.get_have_you_stopped_req(n_of_n=1)
                rep = await client.read(
                    msg,
                    m_of_n_quorum=bft_client.MofNQuorum.All(
                        client.config,
                        [r for r in range(bft_network.config.n)]))
                rsi_rep = client.get_rsi_replies()
                done = True
                for r in rsi_rep.values():
                    if skvbc.parse_rsi_reply(rep, r) == 0:
                        done = False
                        break

        await self.validate_stop_on_super_stable_checkpoint(bft_network, skvbc)
    async def test_read_written_data_after_restart_of_all_nodes(
            self, bft_network):
        """
        This test aims to validate the blockchain is persistent
        (i.e. the data is still available after restarting all nodes)
        1) Write a key-value entry to the blockchain
        2) Restart all replicas (stop all, followed by start all)
        3) Verify the same key-value can be read from the blockchain
        """
        bft_network.start_all_replicas()
        skvbc = kvbc.SimpleKVBCProtocol(bft_network)

        key = skvbc.random_key()
        value = skvbc.random_value()

        kv = (key, value)
        write_kv_msg = skvbc.write_req([], [kv], 0)

        client = bft_network.random_client()
        await client.write(write_kv_msg)

        bft_network.stop_all_replicas()
        bft_network.start_all_replicas()

        read_key_msg = skvbc.read_req([key])
        reply = await client.read(read_key_msg)

        kv_reply = skvbc.parse_reply(reply)

        self.assertEqual({key: value}, kv_reply)
Exemple #7
0
 async def wrapper(*args, **kwargs):
     bft_network = kwargs['bft_network']
     skvbc = kvbc.SimpleKVBCProtocol(bft_network)
     init_state = skvbc.initial_state()
     tracker = SkvbcTracker(init_state, skvbc, bft_network)
     await async_fn(*args, **kwargs, tracker=tracker)
     await tracker.fill_missing_blocks_and_verify()
    async def test_conflicting_requests_with_f_failures(
            self, bft_network, tracker):
        """
        Launch pre-process conflicting request and make sure that conflicting requests are not committed
        """
        bft_network.start_all_replicas()
        await trio.sleep(SKVBC_INIT_GRACE_TIME)
        await bft_network.init_preexec_count()

        n = bft_network.config.n
        f = bft_network.config.f
        c = bft_network.config.c

        initial_primary = 0
        crashed_replicas = bft_network.random_set_of_replicas(
            f, without={initial_primary})
        bft_network.stop_replicas(replicas=crashed_replicas)

        read_client = bft_network.random_client()
        start_block = await tracker.get_last_block_id(read_client)

        ops = 50

        try:
            with trio.move_on_after(seconds=30):
                skvbc = kvbc.SimpleKVBCProtocol(bft_network, tracker)
                await skvbc.run_concurrent_conflict_ops(ops, write_weight=1)
        except trio.TooSlowError:
            pass

        last_block = await tracker.get_last_block_id(read_client)

        # We produced at least one conflict.
        assert last_block < start_block + ops
 async def issue_tracked_ops_to_the_system(self, bft_network, tracker):
     try:
         with trio.move_on_after(seconds=30):
             skvbc = kvbc.SimpleKVBCProtocol(bft_network, tracker)
             await skvbc.run_concurrent_ops(50, write_weight=.70)
     except trio.TooSlowError:
         pass
Exemple #10
0
    async def _test_state_transfer(self):
        for bft_config in bft.interesting_configs():
            config = bft.TestConfig(n=bft_config['n'],
                                    f=bft_config['f'],
                                    c=bft_config['c'],
                                    num_clients=bft_config['num_clients'],
                                    key_file_prefix=KEY_FILE_PREFIX,
                                    start_replica_cmd=start_replica_cmd)
            with bft.BftTestNetwork(config) as bft_network:
                skvbc = kvbc.SimpleKVBCProtocol(bft_network)

                stale_node = random.choice(
                    bft_network.all_replicas(without={0}))

                await skvbc.prime_for_state_transfer(
                    stale_nodes={stale_node},
                    persistency_enabled=False
                )
                bft_network.start_replica(stale_node)
                await bft_network.wait_for_state_transfer_to_start()
                await bft_network.wait_for_state_transfer_to_stop(0, stale_node)
                await skvbc.assert_successful_put_get(self)
                random_replica = random.choice(
                    bft_network.all_replicas(without={0, stale_node}))
                bft_network.stop_replica(random_replica)
                await skvbc.assert_successful_put_get(self)
Exemple #11
0
    async def test_statuses(self, bft_network: 'BftTestNetwork',
                            is_status_valid: bool):
        """
        This test checks that statuses are returned when the request is valid and that correct
        error values are returned when statuses are invalid
        """
        bft_network.start_all_replicas()
        skvbc = kvbc.SimpleKVBCProtocol(bft_network)
        # Make sure that the system is online
        await skvbc.send_n_kvs_sequentially(1)

        validation_functions = [
            self.validate_status_get_command,
            self.validate_status_describe_command
        ]
        if is_status_valid:
            statuses = self.run_ctl_command(['status',
                                             'list']).strip().split('\n')
            assert len(
                statuses), "There must be at least one registered status"

            # Include multi parameter commands validation
            status_combinations = [[status]
                                   for status in statuses] + [statuses]
            for status in status_combinations:
                for function in validation_functions:
                    function(status)
            return

        invalid_status = 'INVALID'
        for function in validation_functions:
            with self.assertRaises(AssertionError):
                function([invalid_status])
    async def test_view_changes_at_startup(self, bft_network, tracker):
        """
        This test aims to validate intial automatic view changes
        with n-f startup. 
        1) Start a BFT network excluding replicas with id 0 and 1
        2) Sleep for sometime, so that replica timeout on connecting to current primary
        3) Do nothing (wait for automatic view change to kick-in)
        4) Check that two view changes have occured
        5) Perform a "read-your-writes" check in the new view
        """
        exclude_replicas = {0,1}
        bft_network.start_replicas(bft_network.all_replicas(without=exclude_replicas))
        skvbc = kvbc.SimpleKVBCProtocol(bft_network, tracker)

        timeout_for_primary_on_startup = 60
        await trio.sleep(timeout_for_primary_on_startup)
        
        final_view = 2
        # do nothing - just wait for view changes
        await bft_network.wait_for_view(
            replica_id=random.choice(
                bft_network.all_replicas(without=exclude_replicas)),
            expected=lambda v: v == final_view,
            err_msg="Make sure view changes have occurred."
        )
        current_primary = await bft_network.get_current_primary() 
        self.assertEqual(current_primary, final_view, "There should be exactly 2 view changes.")
        await skvbc.read_your_writes()
    async def test_single_vc_primary_isolated(self, bft_network):
        """
        The goal of this test is to check the view change
        workflow in case the primary is up, but its outgoing
        communication is intercepted by an adversary.

        1) Given a BFT network,
        2) Insert an adversary that isolates the primary's outgoing communication
        3) Send a batch of write requests.
        4) Verify the BFT network eventually transitions to the next view.
        """
        with net.PrimaryIsolatingAdversary(bft_network) as adversary:
            bft_network.start_all_replicas()
            skvbc = kvbc.SimpleKVBCProtocol(bft_network)

            initial_primary = 0
            await bft_network.wait_for_view_change(
                replica_id=initial_primary,
                expected=lambda v: v == initial_primary,
                err_msg="Make sure we are in the initial view "
                        "before isolating the primary."
            )

            await adversary.interfere()
            expected_next_primary = 1

            await self._send_random_writes(skvbc)

            await bft_network.wait_for_view_change(
                replica_id=random.choice(bft_network.all_replicas(without={0})),
                expected=lambda v: v == expected_next_primary,
                err_msg="Make sure view change has been triggered."
            )
    async def test_single_vc_only_primary_down(self, bft_network):
        """
        The goal of this test is to validate the most basic view change
        scenario - a single view change when the primary replica is down.

        1) Given a BFT network, we trigger parallel writes.
        2) Make sure the initial view is preserved during those writes.
        3) Stop the primary replica and send a batch of write requests.
        4) Verify the BFT network eventually transitions to the next view.
        """
        bft_network.start_all_replicas()
        skvbc = kvbc.SimpleKVBCProtocol(bft_network)

        initial_primary = 0
        expected_next_primary = 1

        await self._send_random_writes(skvbc)

        await bft_network.wait_for_view_change(
            replica_id=initial_primary,
            expected=lambda v: v == initial_primary,
            err_msg="Make sure we are in the initial view "
                    "before crashing the primary."
        )

        bft_network.stop_replica(initial_primary)

        await self._send_random_writes(skvbc)

        await bft_network.wait_for_view_change(
            replica_id=random.choice(bft_network.all_replicas(without={0})),
            expected=lambda v: v == expected_next_primary,
            err_msg="Make sure view change has been triggered."
        )
    async def test_restart_non_primary_replica(self, bft_network, tracker):
        """
        1. Launch a cluster
        2. Make sure that the cluster works in the fast path mode
        3. Stop one non-primary replica
        4. Expected result: The cluster should switch to the slow path
        5. Restart the stopped replica
        6. Expected result: The cluster should switch to the fast path
        """

        initial_primary = 0
        skvbc = kvbc.SimpleKVBCProtocol(bft_network, tracker)
        bft_network.start_all_replicas()

        await bft_network.wait_for_fast_path_to_be_prevalent(
            run_ops=lambda: skvbc.run_concurrent_ops(num_ops=20,
                                                     write_weight=1),
            threshold=20)

        non_primary_replica = random.choice(
            bft_network.all_replicas(without={initial_primary}))

        bft_network.stop_replica(non_primary_replica)

        await bft_network.wait_for_slow_path_to_be_prevalent(
            run_ops=lambda: skvbc.run_concurrent_ops(num_ops=20,
                                                     write_weight=1),
            threshold=20)

        bft_network.start_replica(non_primary_replica)

        await bft_network.wait_for_fast_path_to_be_prevalent(
            run_ops=lambda: skvbc.run_concurrent_ops(num_ops=20,
                                                     write_weight=1),
            threshold=20)
Exemple #16
0
    async def test_long_time_executed_pre_process_request(
            self, bft_network, tracker):
        """
        Launch pre-process request with a long-time execution and ensure that created blocks are as expected
        and no view-change was triggered.
        """
        bft_network.start_all_replicas()
        await trio.sleep(SKVBC_INIT_GRACE_TIME)
        await bft_network.init_preexec_count()

        client = bft_network.random_client()
        client.config = client.config._replace(
            req_timeout_milli=LONG_REQ_TIMEOUT_MILLI, retry_timeout_milli=1000)
        skvbc = kvbc.SimpleKVBCProtocol(bft_network, tracker)
        await skvbc.send_write_kv_set_batch(client,
                                            2,
                                            BATCH_SIZE,
                                            long_exec=True)

        last_block = await tracker.get_last_block_id(client)
        self.assertEqual(last_block, BATCH_SIZE)

        await bft_network.assert_successful_pre_executions_count(0, BATCH_SIZE)

        with trio.move_on_after(seconds=1):
            await skvbc.send_indefinite_ops(write_weight=1)

        initial_primary = 0
        with trio.move_on_after(seconds=15):
            while True:
                await bft_network.wait_for_view(
                    replica_id=initial_primary,
                    expected=lambda v: v == initial_primary,
                    err_msg="Make sure the view did not change.")
                await trio.sleep(seconds=5)
Exemple #17
0
    async def test_reload_slows_path_after_key_exchange(self, bft_network):

        bft_network.start_all_replicas()
        skvbc = kvbc.SimpleKVBCProtocol(bft_network)

        bft_network.stop_replica(2)
        for i in range(20):
            await skvbc.send_write_kv_set()

        lastExecutedKey = ['replica', 'Gauges', 'lastExecutedSeqNum']
        lastExecutedValBefore = await bft_network.metrics.get(
            3, *lastExecutedKey)
        bft_network.stop_replica(3)
        bft_network.start_replica(3)
        for i in range(10):
            await skvbc.send_write_kv_set()

        with trio.fail_after(seconds=20):
            for replica_id in {3}:
                while True:
                    with trio.move_on_after(seconds=1):
                        try:
                            lastExecutedValAfter = await bft_network.metrics.get(
                                3, *lastExecutedKey)
                            if lastExecutedValAfter == lastExecutedValBefore:
                                continue
                        except trio.TooSlowError:
                            print(
                                f"Replica {replica_id} was not able to exchange keys on start"
                            )
                            self.assertTrue(False)
                        else:
                            break
    async def test_fast_to_slow_path(self, bft_network, tracker):
        """
        This test aims to check the correct transitions from fast to slow commit path.

        First we write a series of K/V entries making sure we stay on the fast path.

        Once the first series of K/V writes have been processed we bring down C + 1
        replicas (more than what the fast path can tolerate), which should trigger a transition to the slow path.

        We send a new series of K/V writes and make sure they
        have been processed using the slow commit path.

        Finally the decorator verifies the KV execution.
        """

        bft_network.start_all_replicas()
        skvbc = kvbc.SimpleKVBCProtocol(bft_network, tracker)

        # Initially all replicas are running on the fast path
        await bft_network.wait_for_fast_path_to_be_prevalent(
            run_ops=lambda: skvbc.run_concurrent_ops(num_ops=NUM_OPS,
                                                     write_weight=1),
            threshold=NUM_OPS)

        # Crash C+1 replicas excluding the primary - this ensures that the slow path will be used
        # without forcing a view change
        crash_targets = random.sample(bft_network.all_replicas(without={0}),
                                      bft_network.config.c + 1)
        bft_network.stop_replicas(crash_targets)

        await bft_network.wait_for_slow_path_to_be_prevalent(
            run_ops=lambda: skvbc.run_concurrent_ops(num_ops=NUM_OPS,
                                                     write_weight=1),
            threshold=NUM_OPS)
Exemple #19
0
    async def test_isolate_f_non_primaries_state_transfer(self, bft_network):
        """
        In this test we isolate f replicas long enough for the unaffected replicas to
        trigger a checkpoint. Then, once the adversary is not active anymore, we make
        sure the isolated replicas catch up via state transfer.
        """
        bft_network.start_all_replicas()
        skvbc = kvbc.SimpleKVBCProtocol(bft_network)

        f = bft_network.config.f
        curr_primary = await bft_network.get_current_primary()
        isolated_replicas = bft_network.random_set_of_replicas(
            f, without={curr_primary})

        live_replicas = set(
            bft_network.all_replicas()) - set(isolated_replicas)

        # reach a checkpoint, despite the presence of an adversary
        with net.ReplicaSubsetIsolatingAdversary(
                bft_network, isolated_replicas) as adversary:
            adversary.interfere()

            await skvbc.fill_and_wait_for_checkpoint(
                initial_nodes=list(live_replicas),
                num_of_checkpoints_to_add=3,
                verify_checkpoint_persistency=False)

        # at this point the adversary is inactive, so the isolated replicas
        # should be able to catch-up via state transfer
        await bft_network.wait_for_state_transfer_to_start()

        # state transfer should complete on all isolated replicas
        for ir in isolated_replicas:
            await bft_network.wait_for_state_transfer_to_stop(0, ir)
Exemple #20
0
    async def test_integrity_check_validate_all_with_missing_block(
            self, bft_network):
        """
        Start all replicas including ROR
        Generate a checkpoint
        start integrity
        delete block
        verify that integrity check failed
        This test is executed only in S3 mode.
        """
        if not os.environ.get("CONCORD_BFT_MINIO_BINARY_PATH"):
            return

        bft_network.start_all_replicas()
        ro_replica_id = bft_network.config.n

        bft_network.start_replica(ro_replica_id)
        skvbc = kvbc.SimpleKVBCProtocol(bft_network)

        await skvbc.fill_and_wait_for_checkpoint(
            initial_nodes=bft_network.all_replicas(),
            num_of_checkpoints_to_add=1,
            verify_checkpoint_persistency=False,
            assert_state_transfer_not_started=False)

        await self._wait_for_st(bft_network, ro_replica_id)
        self._delete_block()
        keys_config = f"{KEY_FILE_PREFIX}{ro_replica_id}"
        s3_config = "test_s3_config_prefix.txt"
        try:
            self._start_integrity_check(bft_network, keys_config, s3_config)
            assert False
        except AssertionError as e:
            pass
    async def _test_slow_path_view_change(self):
        for bft_config in bft.interesting_configs():
            config = bft.TestConfig(n=bft_config['n'],
                                    f=bft_config['f'],
                                    c=bft_config['c'],
                                    num_clients=bft_config['num_clients'],
                                    key_file_prefix=KEY_FILE_PREFIX,
                                    start_replica_cmd=start_replica_cmd)
            with bft.BftTestNetwork(config) as bft_network:
                await bft_network.init()
                bft_network.start_all_replicas()
                skvbc = kvbc.SimpleKVBCProtocol(bft_network)

                for _ in range(10):
                    await skvbc.write_known_kv()

                await bft_network.assert_fast_path_prevalent()

                bft_network.stop_replica(0)

                with trio.move_on_after(seconds=5):
                    async with trio.open_nursery() as nursery:
                        nursery.start_soon(
                            skvbc.send_indefinite_write_requests)

                bft_network.start_replica(0)

                await self._wait_for_slow_path_after_view_change(
                    bft_network, as_of_seq_num=10)
 async def test_ro_replica_start_simultaneously (self, bft_network, tracker):
     """
     Start up N of N regular replicas.
     Start read-only replica.
     Send client commands.
     Wait for State Transfer in ReadOnlyReplica to complete.
     """
     bft_network.start_all_replicas()
     # start the read-only replica
     ro_replica_id = bft_network.config.n
     bft_network.start_replica(ro_replica_id)
     # TODO replace the below function with the library function:
     # await tracker.skvbc.tracked_fill_and_wait_for_checkpoint(
     # initial_nodes=bft_network.all_replicas(),
     # num_of_checkpoints_to_add=1)
     with trio.fail_after(seconds=60):
         async with trio.open_nursery() as nursery:
             skvbc = kvbc.SimpleKVBCProtocol(bft_network, tracker)
             nursery.start_soon(skvbc.send_indefinite_ops, .7, .1)
             while True:
                 with trio.move_on_after(seconds=.5):
                     try:
                         key = ['replica', 'Gauges', 'lastExecutedSeqNum']
                         lastExecutedSeqNum = await bft_network.metrics.get(ro_replica_id, *key)
                     except KeyError:
                         continue
                     else:
                         # success!
                         if lastExecutedSeqNum >= 150:
                             log.log_message(message_type="Replica" + str(ro_replica_id) + " : lastExecutedSeqNum:" + str(lastExecutedSeqNum))
                             nursery.cancel_scope.cancel()
    async def test_fast_to_slow_path(self, bft_network: 'BftTestNetwork',
                                     tracker: 'SkvbcTracker'):
        """
        This test aims to check the correct transitions from fast to slow commit path.

        First we write a series of K/V entries making sure we stay on the fast path.

        Once the first series of K/V writes have been processed we bring down C + 1
        replicas (more than what the fast path can tolerate), which should trigger a transition to the slow path.

        We send a new series of K/V writes and make sure they
        have been processed using the slow commit path.

        Finally the decorator verifies the KV execution.
        """
        # Need less than EVALUATION_PERIOD_SEQUENCES messages so that the commit path will not be set to
        # FAST_WITH_THRESHOLD when c>0
        op_count = int(EVALUATION_PERIOD_SEQUENCES * 0.1)
        bft_network.start_all_replicas()
        skvbc = kvbc.SimpleKVBCProtocol(bft_network, tracker)

        # Crash C+1 replicas excluding the primary - this ensures that the slow path will be used
        # without forcing a view change
        crash_targets = random.sample(bft_network.all_replicas(without={0}),
                                      bft_network.config.c + 1)
        bft_network.stop_replicas(crash_targets)

        await bft_network.wait_for_consensus_path(
            path_type=ConsensusPathType.SLOW,
            run_ops=lambda: skvbc.send_n_kvs_sequentially(op_count),
            threshold=op_count)
Exemple #24
0
    async def test_wrong_time_in_non_primary_without_ts(self, bft_network, tracker): 
        """
        1. Launch a cluster
        2. Make sure that the cluster works in the fast path mode
        3. Change local time in one of the non-primary's container so that it would be behind or beyond other replica's local time
        4. Expected result: Cluster should continue to work in fast path as time service is disabled
        5. Change time in the non-primary's container so that it is the same as in other replicas
        6. Expected result: No effect, Cluster should continue to work in fast path
        """
        n = bft_network.config.n
        initial_primary = 0
        
        for replica_id in range(n):
            path = FILE_PATH_PREFIX + str(replica_id) + FILE_PATH_SUFFIX
            await self.manipulate_time_file_write(path, CLOCK_NO_DRIFT)
        
        bft_network.start_all_replicas()
        skvbc = kvbc.SimpleKVBCProtocol(bft_network,tracker)
        await bft_network.wait_for_fast_path_to_be_prevalent(
            run_ops=lambda: skvbc.run_concurrent_ops(num_ops=20, write_weight=1), threshold=20)
        initial_view = await bft_network.get_current_view()

        non_primary_replica = random.choice(
            bft_network.all_replicas(without={initial_primary})) 
        
        path = FILE_PATH_PREFIX + str(non_primary_replica) + FILE_PATH_SUFFIX 
        await self.manipulate_time_file_write(path, CLOCK_DRIFT)

        await bft_network.wait_for_fast_path_to_be_prevalent(
            run_ops=lambda: skvbc.run_concurrent_ops(num_ops=20, write_weight=1), threshold=20)  

        await self.manipulate_time_file_write(path, CLOCK_NO_DRIFT)

        await bft_network.wait_for_fast_path_to_be_prevalent(
            run_ops=lambda: skvbc.run_concurrent_ops(num_ops=20, write_weight=1), threshold=20) 
Exemple #25
0
    async def test_wedge_command(self, bft_network):
        """
             Sends a wedge command and check that the system stops from processing new requests.
             Note that in this test we assume no failures and synchronized network.
             The test does the following:
             1. A client sends a wedge command
             2. The client verify that the system reached to a super stable checkpoint
             3. The client tries to initiate a new write bft command and fails
         """
        bft_network.start_all_replicas()
        skvbc = kvbc.SimpleKVBCProtocol(bft_network)
        client = bft_network.random_client()

        checkpoint_before = await bft_network.wait_for_checkpoint(replica_id=0)

        await client.write(skvbc.write_req([], [], block_id=0, wedge_command=True))

        for replica_id in range(bft_network.config.n):
            with trio.fail_after(seconds=30):
                while True:
                    with trio.move_on_after(seconds=1):
                        checkpoint_after = await bft_network.wait_for_checkpoint(replica_id=replica_id)
                        if checkpoint_after == checkpoint_before + 2:
                            break

        await self.validate_stop_on_super_stable_checkpoint(bft_network, skvbc)
    async def test_wedge_command_and_specific_replica_info(self, bft_network):
        """
             Sends a wedge command and check that the system stops from processing new requests.
             Note that in this test we assume no failures and synchronized network.
             The test does the following:
             1. A client sends a wedge command
             2. The client then sends a "Have you stopped" read only command such that each replica answers "I have stopped"
             3. The client validates with the metrics that all replicas have stopped
         """
        bft_network.start_all_replicas()
        skvbc = kvbc.SimpleKVBCProtocol(bft_network)
        client = bft_network.random_client()
        # We increase the default request timeout because we need to have around 300 consensuses which occasionally may take more than 5 seconds
        client.config._replace(req_timeout_milli=10000)

        op = operator.Operator(bft_network.config, client,  bft_network.builddir)
        await op.wedge()

        with trio.fail_after(seconds=90):
            done = False
            while done is False:
                await op.wedge_status()
                rsi_rep = client.get_rsi_replies()
                done = True
                for r in rsi_rep.values():
                    res = cmf_msgs.ReconfigurationResponse.deserialize(r)
                    status = res[0].response.stopped
                    if status is False:
                        done = False
                        break

        await self.validate_stop_on_super_stable_checkpoint(bft_network, skvbc)
Exemple #27
0
    async def _test_read_written_data_after_restart_of_all_nodes(self):
        for bft_config in bft.interesting_configs():
            config = bft.TestConfig(n=bft_config['n'],
                                    f=bft_config['f'],
                                    c=bft_config['c'],
                                    num_clients=bft_config['num_clients'],
                                    key_file_prefix=KEY_FILE_PREFIX,
                                    start_replica_cmd=start_replica_cmd)
            with bft.BftTestNetwork(config) as bft_network:
                await bft_network.init()
                bft_network.start_all_replicas()
                skvbc = kvbc.SimpleKVBCProtocol(bft_network)

                key = skvbc.random_key()
                value = skvbc.random_value()

                kv = (key, value)
                write_kv_msg = skvbc.write_req([], [kv], 0)

                client = bft_network.random_client()
                await client.write(write_kv_msg)

                bft_network.stop_all_replicas()
                bft_network.start_all_replicas()

                read_key_msg = skvbc.read_req([key])
                reply = await client.read(read_key_msg)

                kv_reply = skvbc.parse_reply(reply)

                self.assertEqual({key: value}, kv_reply)
    async def test_pruning_command(self, bft_network):
        with log.start_action(action_type="test_pruning_command"):
            bft_network.start_all_replicas()
            skvbc = kvbc.SimpleKVBCProtocol(bft_network)
            client = bft_network.random_client()

            # Create 100 blocks in total, including the genesis block we have 101 blocks
            k, v = await skvbc.write_known_kv()
            for i in range(99):
                v = skvbc.random_value()
                await client.write(skvbc.write_req([], [(k, v)], 0))

            # Get the minimal latest pruneable block among all replicas
            op = operator.Operator(bft_network.config, client,  bft_network.builddir)
            await op.latest_pruneable_block()

            latest_pruneable_blocks = []
            rsi_rep = client.get_rsi_replies()
            for r in rsi_rep.values():
                lpab = cmf_msgs.ReconfigurationResponse.deserialize(r)[0]
                latest_pruneable_blocks += [lpab.response]

            await op.prune(latest_pruneable_blocks)
            rsi_rep = client.get_rsi_replies()
            # we expect to have at least 2f + 1 replies
            for rep in rsi_rep:
                r = rsi_rep[rep]
                data = cmf_msgs.ReconfigurationResponse.deserialize(r)[0]
                pruned_block = int(data.additional_data.decode('utf-8'))
                assert pruned_block <= 90
    async def test_ro_replica_with_s3_failures(self, bft_network):
        """
        Start all replicas.
        Stop S3 server.
        Start RO replica.
        After 5 secs start S3 server.
        Wait for State Transfer in ReadOnlyReplica to complete. This test is executed only in S3 mode.
        """
        if not os.environ.get("CONCORD_BFT_MINIO_BINARY_PATH"):
            return

        bft_network.start_all_replicas()
        skvbc = kvbc.SimpleKVBCProtocol(bft_network)

        # start the read-only replica while the s3 service is down
        ro_replica_id = bft_network.config.n
        bft_network.start_replica(ro_replica_id)

        self.__class__._stop_s3_server()
        self.__class__._start_s3_after_X_secs(3)

        await skvbc.fill_and_wait_for_checkpoint(
            initial_nodes=bft_network.all_replicas(),
            checkpoint_num=1,
            verify_checkpoint_persistency=False)

        await self._wait_for_st(bft_network, ro_replica_id)
    async def test_checkpoint_propagation_after_corrupting_checkpoint_msg_for_primary(self, bft_network):
        """
        This test verifies that the primary reaches the same checkpoint number as the one in the `bft_network`,
        even though it sends incorrect data in its checkpoint messages.

        1) Start all replicas in the given `bft_network`
        2) Get the current primary and verify the assumption that it is 0
        3) Send enough requests to trigger 4 checkpoints
        4) Verify that all replicas reached checkpoint 4, including the primary.
        5) Verify that all honest replicas received 4 mismatched checkpoint messages.
        """
        bft_network.start_all_replicas()
        skvbc = kvbc.SimpleKVBCProtocol(bft_network)

        current_primary = await bft_network.get_current_primary()
        assert current_primary == 0, "Unexpected initial primary."

        await skvbc.fill_and_wait_for_checkpoint(
            initial_nodes=bft_network.all_replicas(),
            num_of_checkpoints_to_add=4,
            verify_checkpoint_persistency=False
        )

        key1 = ['checkpoint_msg', 'Counters', 'number_of_checkpoint_mismatch']
        key2 = ['replica', 'Gauges', 'lastExecutedSeqNum']
        for replica_id in bft_network.all_replicas(without={current_primary}) :
            last_executed_seq_num = await bft_network.metrics.get(replica_id, *key2)
            assert last_executed_seq_num != 600, \
                    f"Replica {replica_id} last_executed_seq_num={last_executed_seq_num} != 600"
            if replica_id != current_primary:
                number_of_checkpoint_mismatch = await bft_network.metrics.get(replica_id, *key1)
                assert number_of_checkpoint_mismatch == 4, \
                    f"Replica {replica_id} number_of_checkpoint_mismatch={number_of_checkpoint_mismatch} != 4"