def run(args): txs = app.LoggingTxs() with infra.network.network( args.nodes, args.binary_dir, args.debug_nodes, args.perf_nodes, pdb=args.pdb, txs=txs, ) as network: network.start_and_join(args) test_add_node_from_backup(network, args) test_add_node(network, args) test_retire_backup(network, args) test_add_as_many_pending_nodes(network, args) test_add_node(network, args) test_retire_primary(network, args) test_add_node_from_snapshot(network, args) test_add_node_from_snapshot(network, args, from_backup=True) test_add_node_from_snapshot(network, args, copy_ledger_read_only=False) latest_node_log = network.get_joined_nodes()[-1].remote.log_path() with open(latest_node_log, "r+") as log: assert any( "No snapshot found: Node will replay all historical transactions" in l for l in log.readlines() ), "New nodes shouldn't join from snapshot if snapshot evidence cannot be verified" test_node_filter(network, args)
def run(args): txs = app.LoggingTxs() with infra.network.network( args.nodes, args.binary_dir, args.debug_nodes, args.perf_nodes, pdb=args.pdb, txs=txs, ) as network: network.start_and_join(args) test_add_node_from_backup(network, args) test_add_node(network, args) test_add_node_untrusted_code(network, args) test_retire_backup(network, args) test_add_as_many_pending_nodes(network, args) test_add_node(network, args) test_retire_primary(network, args) if args.snapshot_tx_interval is not None: test_add_node_from_snapshot(network, args, copy_ledger_read_only=True) try: test_add_node_from_snapshot(network, args, copy_ledger_read_only=False) assert ( False ), "Node added from snapshot without ledger should not be able to verify historical entries" except app.LoggingTxsVerifyException: pass
def run(args): hosts = ["localhost", "localhost"] with infra.network.network( hosts, args.binary_dir, args.debug_nodes, args.perf_nodes, pdb=args.pdb, ) as network: network.start_and_join(args) txs = app.LoggingTxs() txs.issue( network=network, number_txs=3, consensus=args.consensus, ) txs.verify(network=network) network = test(network, args) txs.issue( network=network, number_txs=3, consensus=args.consensus, ) txs.verify(network=network)
def run(args): txs = app.LoggingTxs() with infra.network.network( args.nodes, args.binary_dir, args.debug_nodes, args.perf_nodes, pdb=args.pdb, txs=txs, ) as network: network.start_and_join(args) for i in range(args.recovery): # Alternate between recovery with primary change and stable primary-ship, # with and without snapshots if i % 2 == 0: recovered_network = test_share_resilience( network, args, from_snapshot=True ) else: recovered_network = test(network, args, from_snapshot=False) network.stop_all_nodes() network = recovered_network LOG.success("Recovery complete on all nodes")
def run(args): txs = app.LoggingTxs() with infra.network.network( args.nodes, args.binary_dir, args.debug_nodes, args.perf_nodes, pdb=args.pdb, txs=txs, ) as network: network.start_and_join(args) test_add_node_from_backup(network, args) test_add_node(network, args) test_add_node_untrusted_code(network, args) test_retire_backup(network, args) test_add_as_many_pending_nodes(network, args) test_add_node(network, args) test_retire_primary(network, args) if args.snapshot_tx_interval is not None: test_add_node_from_snapshot(network, args, copy_ledger_read_only=True) test_add_node_from_snapshot(network, args, copy_ledger_read_only=False) errors, _ = network.get_joined_nodes()[-1].stop() if not any( "No snapshot found: Node will request all historical transactions" in s for s in errors): raise ValueError( "New node shouldn't join from snapshot if snapshot cannot be verified" )
def run_corrupted_ledger(args): txs = app.LoggingTxs("user0") with infra.network.network( args.nodes, args.binary_dir, args.debug_nodes, args.perf_nodes, pdb=args.pdb, txs=txs, ) as network: network.start_and_open(args) network = test_recover_service_truncated_ledger( network, args, corrupt_first_tx=True ) network = test_recover_service_truncated_ledger( network, args, corrupt_last_tx=True ) network = test_recover_service_truncated_ledger( network, args, corrupt_first_sig=True ) network.stop_all_nodes() # Make sure ledger can be read once recovered (i.e. ledger corruption does not affect recovered ledger) for node in network.nodes: ledger = ccf.ledger.Ledger(node.remote.ledger_paths(), committed_only=False) _, last_seqno = ledger.get_latest_public_state() LOG.info( f"Successfully read ledger for node {node.local_node_id} up to seqno {last_seqno}" )
def run(args): txs = app.LoggingTxs() with infra.network.network( args.nodes, args.binary_dir, args.debug_nodes, args.perf_nodes, pdb=args.pdb, txs=txs, ) as network: network.start_and_join(args) for i in range(args.recovery): # Issue transactions which will required historical ledger queries recovery # when the network is shutdown network.txs.issue(network, number_txs=1) network.txs.issue(network, number_txs=1, repeat=True) # Alternate between recovery with primary change and stable primary-ship, # with and without snapshots if i % 2 == 0: recovered_network = test_share_resilience(network, args, from_snapshot=True) else: recovered_network = test(network, args, from_snapshot=False) network = recovered_network LOG.success("Recovery complete on all nodes")
def run(args): txs = app.LoggingTxs() with infra.network.network( args.nodes, args.binary_dir, args.debug_nodes, args.perf_nodes, pdb=args.pdb, txs=txs, ) as network: network.start_and_join(args) network = test( network, args, verify=args.package != "libjs_generic", ) network = test_illegal(network, args, verify=args.package != "libjs_generic") network = test_large_messages(network, args) network = test_remove(network, args) network = test_forwarding_frontends(network, args) network = test_user_data_ACL(network, args) network = test_cert_prefix(network, args) network = test_anonymous_caller(network, args) network = test_raw_text(network, args) network = test_historical_query(network, args) network = test_view_history(network, args) network = test_primary(network, args) network = test_metrics(network, args) network = test_memory(network, args)
def test_liveness(network, args): txs = app.LoggingTxs() txs.issue( network=network, number_txs=3, ) txs.verify() return network
def run_join_old_snapshot(args): txs = app.LoggingTxs("user0") nodes = ["local://localhost"] with tempfile.TemporaryDirectory() as tmp_dir: with infra.network.network( nodes, args.binary_dir, args.debug_nodes, args.perf_nodes, pdb=args.pdb, txs=txs, ) as network: network.start_and_open(args) primary, _ = network.find_primary() # First, retrieve and save one committed snapshot txs.issue(network, number_txs=args.snapshot_tx_interval) old_committed_snapshots = network.get_committed_snapshots(primary) copy( os.path.join( old_committed_snapshots, os.listdir(old_committed_snapshots)[0] ), tmp_dir, ) # Then generate another newer snapshot, and add two more nodes from it txs.issue(network, number_txs=args.snapshot_tx_interval) for _ in range(0, 2): new_node = network.create_node("local://localhost") network.join_node( new_node, args.package, args, from_snapshot=True, ) network.trust_node(new_node, args) # Kill primary and wait for a new one: new primary is # guaranteed to have started from the new snapshot primary.stop() network.wait_for_new_primary(primary) # Start new node from the old snapshot try: new_node = network.create_node("local://localhost") network.join_node( new_node, args.package, args, from_snapshot=True, snapshots_dir=tmp_dir, timeout=3, ) except infra.network.StartupSnapshotIsOld: pass
def run(args): txs = app.LoggingTxs() with infra.network.network( args.nodes, args.binary_dir, args.debug_nodes, args.perf_nodes, pdb=args.pdb, txs=txs, ) as network: network.start_and_join(args) network = test( network, args, verify=args.package != "libjs_generic", ) network = test_illegal(network, args, verify=args.package != "libjs_generic") network = test_large_messages(network, args) network = test_remove(network, args) network = test_clear(network, args) network = test_record_count(network, args) network = test_forwarding_frontends(network, args) network = test_signed_escapes(network, args) network = test_user_data_ACL(network, args) network = test_cert_prefix(network, args) network = test_anonymous_caller(network, args) network = test_multi_auth(network, args) network = test_custom_auth(network, args) network = test_custom_auth_safety(network, args) network = test_raw_text(network, args) network = test_historical_query(network, args) network = test_historical_query_range(network, args) network = test_view_history(network, args) network = test_primary(network, args) network = test_network_node_info(network, args) network = test_metrics(network, args) network = test_memory(network, args) # BFT does not handle re-keying yet if args.consensus == "cft": network = test_liveness(network, args) network = test_rekey(network, args) network = test_liveness(network, args) if args.package == "liblogging": network = test_ws(network, args) network = test_receipts(network, args) network = test_historical_receipts(network, args)
def test(network, args, notifications_queue=None, verify=True): txs = app.LoggingTxs(notifications_queue=notifications_queue) txs.issue(network=network, number_txs=1, wait_for_sync=args.consensus == "raft") txs.issue( network=network, number_txs=1, on_backup=True, wait_for_sync=args.consensus == "raft", ) if verify: txs.verify(network) else: LOG.warning("Skipping log messages verification") return network
def test_user(network, args, notifications_queue=None, verify=True): primary, _ = network.find_nodes() new_user_id = 3 network.create_users([new_user_id], args.participants_curve) network.consortium.add_user(primary, new_user_id) txs = app.LoggingTxs(notifications_queue=notifications_queue, user_id=3) txs.issue( network=network, number_txs=1, consensus=args.consensus, ) if verify: txs.verify(network) network.consortium.remove_user(primary, new_user_id) with primary.client(f"user{new_user_id}") as c: r = c.get("/app/log/private") assert r.status_code == http.HTTPStatus.FORBIDDEN.value return network
def run_file_operations(args): with tempfile.TemporaryDirectory() as tmp_dir: txs = app.LoggingTxs("user0") with infra.network.network( args.nodes, args.binary_dir, args.debug_nodes, args.perf_nodes, pdb=args.pdb, txs=txs, ) as network: args.common_read_only_ledger_dir = tmp_dir network.start_and_join(args) test_save_committed_ledger_files(network, args) test_parse_snapshot_file(network, args)
def test(network, args, notifications_queue=None, verify=True): txs = app.LoggingTxs(notifications_queue=notifications_queue) txs.issue(network=network, number_txs=1, wait_for_sync=args.consensus == "raft") txs.issue( network=network, number_txs=1, on_backup=True, wait_for_sync=args.consensus == "raft", ) # TODO: Once the JS app supports both public and private tables, always verify if verify: txs.verify(network) else: LOG.warning("Skipping log messages verification") return network
def run(args): hosts = ["localhost", "localhost"] txs = app.LoggingTxs() with infra.ccf.network(hosts, args.binary_dir, args.debug_nodes, args.perf_nodes, pdb=args.pdb, txs=txs) as network: network.start_and_join(args) for recovery_idx in range(args.recovery): recovered_network = test(network, args) network.stop_all_nodes() network = recovered_network LOG.success("Recovery complete on all nodes")
def test(network, args, verify=True): txs = app.LoggingTxs() txs.issue( network=network, number_txs=1, consensus=args.consensus, ) txs.issue( network=network, number_txs=1, on_backup=True, consensus=args.consensus, ) if verify: txs.verify(network) else: LOG.warning("Skipping log messages verification") return network
def run(args): txs = app.LoggingTxs() with infra.network.network( args.nodes, args.binary_dir, args.debug_nodes, args.perf_nodes, pdb=args.pdb, txs=txs, init_partitioner=True, ) as network: network.start_and_join(args) # test_invalid_partitions(network, args) test_partition_majority(network, args) test_isolate_primary_from_one_backup(network, args) for _ in range(5): test_isolate_and_reconnect_primary(network, args)
def test_user(network, args, verify=True): # Note: This test should not be chained in the test suite as it creates # a new user and uses its own LoggingTxs primary, _ = network.find_nodes() new_user_local_id = "user3" new_user = network.create_user(new_user_local_id, args.participants_curve) user_data = {"lifetime": "temporary"} network.consortium.add_user(primary, new_user.local_id, user_data) txs = app.LoggingTxs(user_id=new_user.local_id) txs.issue( network=network, number_txs=1, ) if verify: txs.verify() network.consortium.remove_user(primary, new_user.service_id) with primary.client(new_user_local_id) as c: r = c.get("/app/log/private") assert r.status_code == http.HTTPStatus.UNAUTHORIZED.value return network
def save_committed_ledger_files(network, args): txs = app.LoggingTxs() # Issue txs in a loop to force a signature and a new ledger chunk # each time. Record log messages at the same key (repeat=True) so # that CCF makes use of historical queries when verifying messages for _ in range(1, 5): txs.issue(network, 1, repeat=True) LOG.info( f"Moving committed ledger files to {args.common_read_only_ledger_dir}") primary, _ = network.find_primary() for l in os.listdir(primary.remote.ledger_path()): if infra.node.is_file_committed(l): shutil.move( os.path.join(primary.remote.ledger_path(), l), os.path.join(args.common_read_only_ledger_dir, l), ) txs.verify(network) return network
def run(args): txs = app.LoggingTxs("user0") with infra.network.network( args.nodes, args.binary_dir, args.debug_nodes, args.perf_nodes, pdb=args.pdb, txs=txs, init_partitioner=True, ) as network: network.start_and_open(args) test_invalid_partitions(network, args) test_partition_majority(network, args) test_isolate_primary_from_one_backup(network, args) test_new_joiner_helps_liveness(network, args) for n in range(5): test_isolate_and_reconnect_primary(network, args, iteration=n)
def run(args): txs = app.LoggingTxs("user0") with infra.network.network( args.nodes, args.binary_dir, args.debug_nodes, args.perf_nodes, pdb=args.pdb, txs=txs, ) as network: network.start_and_open(args) test_version(network, args) if args.consensus != "BFT": test_join_straddling_primary_replacement(network, args) test_node_replacement(network, args) test_add_node_from_backup(network, args) test_add_node(network, args) test_add_node_on_other_curve(network, args) test_retire_backup(network, args) test_add_as_many_pending_nodes(network, args) test_add_node(network, args) test_retire_primary(network, args) test_add_node_with_read_only_ledger(network, args) test_add_node_from_snapshot(network, args) test_add_node_from_snapshot(network, args, from_backup=True) test_add_node_from_snapshot(network, args, copy_ledger_read_only=False) test_node_filter(network, args) test_retiring_nodes_emit_at_most_one_signature(network, args) if args.reconfiguration_type == "TwoTransaction": test_learner_catches_up(network, args) test_service_config_endpoint(network, args) test_node_certificates_validity_period(network, args) test_add_node_invalid_validity_period(network, args)
def run(args): hosts = ["localhost", "localhost", "localhost"] txs = app.LoggingTxs() with infra.ccf.network(hosts, args.binary_dir, args.debug_nodes, args.perf_nodes, pdb=args.pdb, txs=txs) as network: network.start_and_join(args) for i in range(args.recovery): # Alternate between recovery with primary change and stable primary-ship if i % 2 == 0: recovered_network = test_share_resilience(network, args) else: recovered_network = test(network, args) network.stop_all_nodes() network = recovered_network LOG.success("Recovery complete on all nodes")
def run(args): txs = app.LoggingTxs() with infra.network.network( args.nodes, args.binary_dir, args.debug_nodes, args.perf_nodes, pdb=args.pdb, txs=txs, ) as network: network.start_and_join(args) test_add_node_from_backup(network, args) test_add_node(network, args) test_add_node_untrusted_code(network, args) test_retire_backup(network, args) test_add_as_many_pending_nodes(network, args) test_add_node(network, args) test_retire_primary(network, args) if args.snapshot_tx_interval is not None: test_add_node_from_snapshot(network, args, copy_ledger_read_only=True)
def run(args): hosts = ["localhost", "localhost"] txs = app.LoggingTxs() with infra.ccf.network(hosts, args.binary_dir, args.debug_nodes, args.perf_nodes, pdb=args.pdb, txs=txs) as network: network.start_and_join(args) if args.use_shares: network.consortium.store_current_network_encryption_key() for recovery_idx in range(args.recovery): recovered_network = test(network, args, use_shares=args.use_shares) network.stop_all_nodes() network = recovered_network LOG.success("Recovery complete on all nodes")
def test_illegal(network, args, notifications_queue=None, verify=True): # Send malformed HTTP traffic and check the connection is closed cafile = cafile = os.path.join(network.common_dir, "networkcert.pem") context = ssl.create_default_context(cafile=cafile) context.set_ecdh_curve(infra.clients.get_curve(cafile).name) context.load_cert_chain( certfile=os.path.join(network.common_dir, "user0_cert.pem"), keyfile=os.path.join(network.common_dir, "user0_privk.pem"), ) sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) conn = context.wrap_socket(sock, server_side=False, server_hostname=network.nodes[0].host) conn.connect((network.nodes[0].host, network.nodes[0].rpc_port)) conn.sendall(b"NOTAVERB ") rv = conn.recv(1024) assert rv == b"", rv # Valid transactions are still accepted txs = app.LoggingTxs(notifications_queue=notifications_queue) txs.issue( network=network, number_txs=1, consensus=args.consensus, ) txs.issue( network=network, number_txs=1, on_backup=True, consensus=args.consensus, ) if verify: txs.verify(network) else: LOG.warning("Skipping log messages verification") return network
def run_file_operations(args): with tempfile.TemporaryDirectory() as tmp_dir: txs = app.LoggingTxs("user0") with infra.network.network( args.nodes, args.binary_dir, args.debug_nodes, args.perf_nodes, pdb=args.pdb, txs=txs, ) as network: args.common_read_only_ledger_dir = tmp_dir network.start_and_open(args) test_save_committed_ledger_files(network, args) test_parse_snapshot_file(network, args) test_forced_ledger_chunk(network, args) test_forced_snapshot(network, args) primary, _ = network.find_primary() network.stop_all_nodes() test_split_ledger_on_stopped_network(primary, args)
def run(args): with infra.network.network( args.nodes, args.binary_dir, args.debug_nodes, args.perf_nodes, pdb=args.pdb, ) as network: network.start_and_join(args) txs = app.LoggingTxs() txs.issue( network=network, number_txs=3, ) txs.verify() network = test(network, args) txs.issue( network=network, number_txs=3, ) txs.verify()
def run(args): s.validate_tests_signature(s.tests) if args.enforce_reqs is False: LOG.warning("Test requirements will be ignored") hosts = ["localhost", "localhost"] txs = app.LoggingTxs() network = infra.ccf.Network(hosts, args.binary_dir, args.debug_nodes, args.perf_nodes, txs=txs) network.start_and_join(args) LOG.info(f"Running {len(s.tests)} tests for {args.test_duration} seconds") run_tests = {} success = True elapsed = args.test_duration for i, test in enumerate(s.tests): status = None reason = None if elapsed <= 0: LOG.warning( f"Test duration time ({args.test_duration} seconds) is up!") break try: LOG.debug(f"Running {s.test_name(test)}...") test_time_before = time.time() # Actually run the test new_network = test(network, args) status = TestStatus.success except reqs.TestRequirementsNotMet as ce: LOG.warning(f"Test requirements for {s.test_name(test)} not met") status = TestStatus.skipped reason = str(ce) new_network = network except Exception as e: LOG.exception(f"Test {s.test_name(test)} failed") status = TestStatus.failure new_network = network test_elapsed = time.time() - test_time_before # Construct test report run_tests[i] = { "name": s.test_name(test), "status": status.name, "elapsed (s)": round(test_elapsed, 2), } if reason is not None: run_tests[i]["reason"] = reason # If the test function did not return a network, it is not possible to continue if new_network is None: raise ValueError( f"Network returned by {s.test_name(test)} is None") # If the network was changed (e.g. recovery test), stop the previous network # and use the new network from now on if new_network != network: network.stop_all_nodes() network = new_network LOG.debug(f"Test {s.test_name(test)} took {test_elapsed:.2f} secs") # For now, if a test fails, the entire test suite if stopped if status is TestStatus.failure: success = False break elapsed -= test_elapsed network.stop_all_nodes() LOG.success(f"Ran {len(run_tests)}/{len(s.tests)} tests:") LOG.success(f"\n{json.dumps(run_tests, indent=4)}") if not success: sys.exit(1)
def run(args): hosts = ["localhost", "localhost", "localhost"] LOG.info(f"setting seed to {args.seed}") random.seed(args.seed) txs = app.LoggingTxs() with infra.ccf.network(hosts, args.binary_dir, args.debug_nodes, args.perf_nodes, pdb=args.pdb, txs=txs) as network: network.start_and_join(args) original_nodes = network.get_joined_nodes() view_info = {} suspend.update_view_info(network, view_info) app.test_run_txs(network=network, args=args, num_txs=TOTAL_REQUESTS) suspend.test_suspend_nodes(network, args) # run txs while nodes get suspended app.test_run_txs( network=network, args=args, num_txs=4 * TOTAL_REQUESTS, ignore_failures=True, ) suspend.update_view_info(network, view_info) late_joiner = network.create_and_trust_node(args.package, "localhost", args) # some requests to be processed while the late joiner catches up # (no strict checking that these requests are actually being processed simultaneously with the node catchup) app.test_run_txs( network=network, args=args, num_txs=int(TOTAL_REQUESTS / 2), nodes=original_nodes, # doesn't contain late joiner verify= False, # will try to verify for late joiner and it might not be ready yet ) caught_up = suspend.wait_for_late_joiner(original_nodes[0], late_joiner) if caught_up == suspend.LateJoinerStatus.Stuck: # should be removed when node configuration has been implemented to allow # a late joiner to force a view change LOG.warning( "late joiner is stuck, stop trying if catchup fails again") suspend.wait_for_late_joiner(original_nodes[0], late_joiner, True) elif caught_up == suspend.LateJoinerStatus.NotReady: while caught_up == suspend.LateJoinerStatus.NotReady: LOG.warning("late joiner is not ready to accept RPC's yet") caught_up = suspend.wait_for_late_joiner( original_nodes[0], late_joiner) elif caught_up == suspend.LateJoinerStatus.Ready: LOG.success("late joiner caught up successfully") # check nodes have resumed normal execution before shutting down app.test_run_txs( network=network, args=args, num_txs=len(network.get_joined_nodes()), timeout=30, ignore_failures=True, ) # assert that view changes actually did occur assert len(view_info) > 1 LOG.success("----------- views and primaries recorded -----------") for view, primary in view_info.items(): LOG.success(f"view {view} - primary {primary}")