def test_add_node_from_backup(network, args): new_node = network.create_node("local://localhost") network.join_node( new_node, args.package, args, target_node=network.find_any_backup() ) network.trust_node(new_node, args) return network
def test_node_data(network, args): with tempfile.NamedTemporaryFile(mode="w+") as ntf: primary, _ = network.find_primary() with primary.client() as c: def get_nodes(): r = c.get("/node/network/nodes") assert r.status_code == 200, (r.status_code, r.body.text()) return { node_info["node_id"]: node_info for node_info in r.body.json()["nodes"] } new_node_data = {"my_id": "0xdeadbeef", "location": "The Moon"} json.dump(new_node_data, ntf) ntf.flush() untrusted_node = network.create_node( infra.interfaces.HostSpec( rpc_interfaces={ infra.interfaces.PRIMARY_RPC_INTERFACE: infra.interfaces.RPCInterface( endorsement=infra.interfaces.Endorsement( authority=infra.interfaces. EndorsementAuthority.Node)) }), node_data_json_file=ntf.name, ) # NB: This new node joins but is never trusted network.join_node(untrusted_node, args.package, args) nodes = get_nodes() assert untrusted_node.node_id in nodes, nodes new_node_info = nodes[untrusted_node.node_id] assert new_node_info["node_data"] == new_node_data, new_node_info # Set modified node data new_node_data["previous_locations"] = [new_node_data["location"]] new_node_data["location"] = "Secret Base" network.consortium.set_node_data(primary, untrusted_node.node_id, new_node_data) nodes = get_nodes() assert untrusted_node.node_id in nodes, nodes new_node_info = nodes[untrusted_node.node_id] assert new_node_info["node_data"] == new_node_data, new_node_info # Set modified node data on trusted primary primary_node_data = "Some plain JSON string" network.consortium.set_node_data(primary, primary.node_id, primary_node_data) nodes = get_nodes() assert primary.node_id in nodes, nodes primary_node_info = nodes[primary.node_id] assert (primary_node_info["node_data"] == primary_node_data ), primary_node_info return network
def test_add_node_with_bad_code(network, args): if args.enclave_type == "virtual": LOG.warning( "Skipping test_add_node_with_bad_code with virtual enclave") return network replacement_package = ("samples/apps/logging/liblogging" if args.package == "libjs_generic" else "libjs_generic") new_code_id = infra.utils.get_code_id(args.enclave_type, args.oe_binary, replacement_package) LOG.info(f"Adding a node with unsupported code id {new_code_id}") code_not_found_exception = None try: new_node = network.create_node("local://localhost") network.join_node(new_node, replacement_package, args, timeout=3) except infra.network.CodeIdNotFound as err: code_not_found_exception = err assert ( code_not_found_exception is not None ), f"Adding a node with unsupported code id {new_code_id} should fail" return network
def test_node_filter(network, args): primary, _ = network.find_primary_and_any_backup() with primary.client() as c: def get_nodes(status): r = c.get(f"/node/network/nodes?status={status}") nodes = r.body.json()["nodes"] return sorted(nodes, key=lambda node: node["node_id"]) trusted_before = get_nodes("Trusted") pending_before = get_nodes("Pending") retired_before = get_nodes("Retired") new_node = network.create_node("local://localhost") network.join_node(new_node, args.package, args, target_node=primary) trusted_after = get_nodes("Trusted") pending_after = get_nodes("Pending") retired_after = get_nodes("Retired") assert trusted_before == trusted_after, (trusted_before, trusted_after) assert len(pending_before) + 1 == len(pending_after), ( pending_before, pending_after, ) assert retired_before == retired_after, (retired_before, retired_after) assert all(info["status"] == "Trusted" for info in trusted_after), trusted_after assert all(info["status"] == "Pending" for info in pending_after), pending_after assert all(info["status"] == "Retired" for info in retired_after), retired_after return network
def test_add_as_many_pending_nodes(network, args): # Killing pending nodes should not change the raft consensus rules primary, _ = network.find_primary() number_new_nodes = len(network.nodes) LOG.info( f"Adding {number_new_nodes} pending nodes - consensus rules should not change" ) new_nodes = [] for _ in range(number_new_nodes): new_node = network.create_node("local://localhost") network.join_node(new_node, args.package, args, from_snapshot=False) new_nodes.append(new_node) for new_node in new_nodes: new_node.stop() # Even though pending nodes (half the number of nodes) are stopped, # service can still make progress check_can_progress(primary) # Cleanup killed pending nodes for new_node in new_nodes: network.retire_node(primary, new_node) wait_for_reconfiguration_to_complete(network) return network
def test_add_as_many_pending_nodes(network, args): # Should not change the raft consensus rules (i.e. majority) primary, _ = network.find_primary() number_new_nodes = len(network.nodes) LOG.info( f"Adding {number_new_nodes} pending nodes - consensus rules should not change" ) new_nodes = [] for _ in range(number_new_nodes): new_node = network.create_node("local://localhost") network.join_node(new_node, args.package, args, from_snapshot=False) new_nodes.append(new_node) check_can_progress(primary) for new_node in new_nodes: network.retire_node(primary, new_node) wait_for_reconfiguration_to_complete(network) # Stop the retired nodes so they don't linger in the background and interfere # with subsequent tests for new_node in new_nodes: new_node.stop() return network
def test_node_replacement(network, args): primary, backups = network.find_nodes() node_to_replace = backups[-1] LOG.info(f"Retiring node {node_to_replace.local_node_id}") network.retire_node(primary, node_to_replace) node_to_replace.stop() check_can_progress(primary) LOG.info("Adding one node on same address as retired node") replacement_node = network.create_node( f"local://{node_to_replace.rpc_host}:{node_to_replace.rpc_port}", node_port=node_to_replace.node_port, ) network.join_node(replacement_node, args.package, args, from_snapshot=False) network.trust_node(replacement_node, args) assert replacement_node.node_id != node_to_replace.node_id assert replacement_node.rpc_host == node_to_replace.rpc_host assert replacement_node.node_port == node_to_replace.node_port assert replacement_node.rpc_port == node_to_replace.rpc_port allowed_to_suspend_count = network.get_f() - len(network.get_stopped_nodes()) backups_to_suspend = backups[:allowed_to_suspend_count] LOG.info( f"Suspending {len(backups_to_suspend)} other nodes to make progress depend on the replacement" ) for other_backup in backups_to_suspend: other_backup.suspend() # Confirm the network can make progress check_can_progress(primary) for other_backup in backups_to_suspend: other_backup.resume() return network
def run_tls_san_checks(args): with infra.network.network( args.nodes, args.binary_dir, args.debug_nodes, args.perf_nodes, pdb=args.pdb, ) as network: args.common_read_only_ledger_dir = None # Reset from previous test network.start_and_join(args) LOG.info("Check SAN value in TLS certificate") dummy_san = "*.dummy.com" new_node = network.create_node("local://localhost") args.san = [f"dNSName:{dummy_san}"] network.join_node(new_node, args.package, args) sans = infra.crypto.get_san_from_pem_cert(new_node.get_tls_certificate_pem()) assert len(sans) == 1, "Expected exactly one SAN" assert sans[0].value == dummy_san LOG.info("A node started with no specified SAN defaults to public RPC host") dummy_public_rpc_host = "123.123.123.123" args.san = None new_node = network.create_node(f"local://localhost:0,{dummy_public_rpc_host}") network.join_node(new_node, args.package, args) sans = infra.crypto.get_san_from_pem_cert( new_node.get_tls_certificate_pem(use_public_rpc_host=False) ) assert len(sans) == 1, "Expected exactly one SAN" assert sans[0].value == ipaddress.ip_address(dummy_public_rpc_host)
def run_join_old_snapshot(args): txs = app.LoggingTxs("user0") nodes = ["local://localhost"] with tempfile.TemporaryDirectory() as tmp_dir: with infra.network.network( nodes, args.binary_dir, args.debug_nodes, args.perf_nodes, pdb=args.pdb, txs=txs, ) as network: network.start_and_open(args) primary, _ = network.find_primary() # First, retrieve and save one committed snapshot txs.issue(network, number_txs=args.snapshot_tx_interval) old_committed_snapshots = network.get_committed_snapshots(primary) copy( os.path.join( old_committed_snapshots, os.listdir(old_committed_snapshots)[0] ), tmp_dir, ) # Then generate another newer snapshot, and add two more nodes from it txs.issue(network, number_txs=args.snapshot_tx_interval) for _ in range(0, 2): new_node = network.create_node("local://localhost") network.join_node( new_node, args.package, args, from_snapshot=True, ) network.trust_node(new_node, args) # Kill primary and wait for a new one: new primary is # guaranteed to have started from the new snapshot primary.stop() network.wait_for_new_primary(primary) # Start new node from the old snapshot try: new_node = network.create_node("local://localhost") network.join_node( new_node, args.package, args, from_snapshot=True, snapshots_dir=tmp_dir, timeout=3, ) except infra.network.StartupSnapshotIsOld: pass
def test_no_quote(network, args): untrusted_node = network.create_node("local://localhost") network.join_node(untrusted_node, args.package, args) with untrusted_node.client( ca=os.path.join(untrusted_node.common_dir, f"{untrusted_node.local_node_id}.pem")) as uc: r = uc.get("/node/quotes/self") assert r.status_code == http.HTTPStatus.NOT_FOUND return network
def run_tls_san_checks(args): with infra.network.network( args.nodes, args.binary_dir, args.debug_nodes, args.perf_nodes, pdb=args.pdb, ) as network: args.common_read_only_ledger_dir = None # Reset from previous test network.start_and_open(args) network.verify_service_certificate_validity_period( args.initial_service_cert_validity_days) LOG.info("Check SAN value in TLS certificate") dummy_san = "*.dummy.com" new_node = network.create_node( infra.interfaces.HostSpec( rpc_interfaces={ infra.interfaces.PRIMARY_RPC_INTERFACE: infra.interfaces. RPCInterface(endorsement=infra.interfaces.Endorsement( authority=infra.interfaces.EndorsementAuthority.Node)) })) args.subject_alt_names = [f"dNSName:{dummy_san}"] network.join_node(new_node, args.package, args) sans = infra.crypto.get_san_from_pem_cert( new_node.get_tls_certificate_pem()) assert len(sans) == 1, "Expected exactly one SAN" assert sans[0].value == dummy_san LOG.info( "A node started with no specified SAN defaults to public RPC host") dummy_public_rpc_host = "123.123.123.123" args.subject_alt_names = [] new_node = network.create_node( infra.interfaces.HostSpec( rpc_interfaces={ infra.interfaces.PRIMARY_RPC_INTERFACE: infra.interfaces.RPCInterface( public_host=dummy_public_rpc_host, endorsement=infra.interfaces.Endorsement( authority=infra.interfaces.EndorsementAuthority. Node), ) })) network.join_node(new_node, args.package, args) # Cannot trust the node here as client cannot authenticate dummy public IP in cert with open( os.path.join(network.common_dir, f"{new_node.local_node_id}.pem"), encoding="utf-8", ) as self_signed_cert: sans = infra.crypto.get_san_from_pem_cert(self_signed_cert.read()) assert len(sans) == 1, "Expected exactly one SAN" assert sans[0].value == ipaddress.ip_address(dummy_public_rpc_host)
def test_add_node_with_read_only_ledger(network, args): network.txs.issue(network, number_txs=10) network.txs.issue(network, number_txs=2, repeat=True) new_node = network.create_node("local://localhost") network.join_node( new_node, args.package, args, from_snapshot=False, copy_ledger_read_only=True ) network.trust_node(new_node, args) return network
def test_new_joiner_helps_liveness(network, args): primary, backups = network.find_nodes() # Issue some transactions, so there is a ledger history that a new node must receive network.txs.issue(network, number_txs=10) # Remove a node, leaving the network frail network.retire_node(primary, backups[-1]) backups[-1].stop() primary, backups = network.find_nodes() with contextlib.ExitStack() as stack: # Add a new node, but partition them before trusting them new_node = network.create_node("local://localhost") network.join_node(new_node, args.package, args, from_snapshot=False) new_joiner_partition = [new_node] new_joiner_rules = stack.enter_context( network.partitioner.partition([primary, *backups], new_joiner_partition)) # Trust the new node, and wait for commit of this (but don't ask the new node itself, which doesn't know this yet) network.trust_node(new_node, args, no_wait=True) check_can_progress(primary) # Partition the primary, temporarily creating a minority service that cannot make progress minority_partition = backups[len(backups) // 2:] + new_joiner_partition minority_rules = stack.enter_context( network.partitioner.partition(minority_partition)) # This is an unusual situation, where we've actually produced a dead partitioned node. # Initially any write requests will timeout (failed attempt at forwarding), and then # the node transitions to a candidate with nobody to talk to. Rather than trying to # catch the errors of these states quickly, we just sleep until the latter state is # reached, and then confirm it was reached. time.sleep(network.observed_election_duration) with backups[0].client("user0") as c: r = c.post("/app/log/private", {"id": 42, "msg": "Hello world"}) assert r.status_code == http.HTTPStatus.SERVICE_UNAVAILABLE # Restore the new node to the service new_joiner_rules.drop() # Confirm that the new node catches up, and progress can be made in this majority partition network.wait_for_new_primary(primary, minority_partition) check_can_progress(new_node) # Explicitly drop rules before continuing minority_rules.drop() network.wait_for_primary_unanimity() primary, _ = network.find_nodes() network.wait_for_all_nodes_to_commit(primary=primary)
def test_learner_does_not_take_part(network, args): primary, backups = network.find_nodes() f_backups = backups[:network.get_f() + 1] new_node = network.create_node("local://localhost") network.join_node(new_node, args.package, args, from_snapshot=False) with network.partitioner.partition(f_backups): check_does_not_progress(primary, timeout=5) try: network.consortium.trust_node( primary, new_node.node_id, timeout=ceil(args.join_timer * 2 / 1000), valid_from=str( infra.crypto.datetime_to_X509time(datetime.now())), ) new_node.wait_for_node_to_join(timeout=ceil(args.join_timer * 2 / 1000)) join_failed = False except Exception: join_failed = True if not join_failed: raise Exception("join succeeded unexpectedly") with new_node.client(self_signed_ok=True) as c: r = c.get("/node/network/nodes/self") assert r.body.json()["status"] == "Learner" r = c.get("/node/consensus") assert new_node.node_id in r.body.json()["details"]["learners"] # New node joins, but cannot be promoted to TRUSTED without f other backups check_does_not_progress(primary, timeout=5) with new_node.client(self_signed_ok=True) as c: r = c.get("/node/network/nodes/self") assert r.body.json()["status"] == "Learner" r = c.get("/node/consensus") assert new_node.node_id in r.body.json()["details"]["learners"] network.wait_for_primary_unanimity() primary, _ = network.find_nodes() network.wait_for_all_nodes_to_commit(primary=primary) check_can_progress(primary)
def test_no_quote(network, args): untrusted_node = network.create_node( infra.interfaces.HostSpec( rpc_interfaces={ infra.interfaces.PRIMARY_RPC_INTERFACE: infra.interfaces.RPCInterface( endorsement=infra.interfaces.Endorsement( authority=infra.interfaces.EndorsementAuthority.Node)) })) network.join_node(untrusted_node, args.package, args) with untrusted_node.client( ca=os.path.join(untrusted_node.common_dir, f"{untrusted_node.local_node_id}.pem")) as uc: r = uc.get("/node/quotes/self") assert r.status_code == http.HTTPStatus.NOT_FOUND return network
def test_add_node(network, args, from_snapshot=True): # Note: host is supplied explicitly to avoid having differently # assigned IPs for the interfaces, something which the test infra doesn't # support widely yet. operator_rpc_interface = "operator_rpc_interface" host = infra.net.expand_localhost() new_node = network.create_node( infra.interfaces.HostSpec( rpc_interfaces={ infra.interfaces.PRIMARY_RPC_INTERFACE: infra.interfaces.RPCInterface(host=host), operator_rpc_interface: infra.interfaces.RPCInterface( host=host, endorsement=infra.interfaces.Endorsement( authority=infra.interfaces.EndorsementAuthority.Node), ), })) network.join_node(new_node, args.package, args, from_snapshot=from_snapshot) # Verify self-signed node certificate validity period new_node.verify_certificate_validity_period( interface_name=operator_rpc_interface) network.trust_node( new_node, args, validity_period_days=args.maximum_node_certificate_validity_days // 2, ) if not from_snapshot: with new_node.client() as c: s = c.get("/node/state") assert s.body.json()["node_id"] == new_node.node_id assert ( s.body.json()["startup_seqno"] == 0 ), "Node started without snapshot but reports startup seqno != 0" # Now that the node is trusted, verify endorsed certificate validity period new_node.verify_certificate_validity_period() return network
def test_join_straddling_primary_replacement(network, args): # We need a fourth node before we attempt the replacement, otherwise # we will reach a situation where two out four nodes in the voting quorum # are unable to participate (one retired and one not yet joined). test_add_node(network, args) primary, _ = network.find_primary() new_node = network.create_node("local://localhost") network.join_node(new_node, args.package, args) proposal_body = { "actions": [ { "name": "transition_node_to_trusted", "args": { "node_id": new_node.node_id, "valid_from": str(datetime.now()), }, }, { "name": "remove_node", "args": { "node_id": primary.node_id }, }, ] } proposal = network.consortium.get_any_active_member().propose( primary, proposal_body) network.consortium.vote_using_majority( primary, proposal, { "ballot": "export function vote (proposal, proposer_id) { return true }" }, timeout=10, ) network.wait_for_new_primary(primary) new_node.wait_for_node_to_join(timeout=10) primary.stop() network.nodes.remove(primary) wait_for_reconfiguration_to_complete(network) return network
def test_add_node_invalid_validity_period(network, args): new_node = network.create_node("local://localhost") network.join_node(new_node, args.package, args) try: network.trust_node( new_node, args, validity_period_days=args.maximum_node_certificate_validity_days + 1, ) except infra.proposal.ProposalNotAccepted: LOG.info( "As expected, node could not be trusted since its certificate validity period is invalid" ) else: raise Exception( "Node should not be trusted if its certificate validity period is invalid" ) return network
def test_add_node_from_snapshot( network, args, copy_ledger_read_only=True, from_backup=False ): # Before adding the node from a snapshot, override at least one app entry # and wait for a new committed snapshot covering that entry, so that there # is at least one historical entry to verify. network.txs.issue(network, number_txs=1) for _ in range(1, args.snapshot_tx_interval): network.txs.issue(network, number_txs=1, repeat=True) last_tx = network.txs.get_last_tx(priv=True) if network.wait_for_snapshot_committed_for(seqno=last_tx[1]["seqno"]): break target_node = None snapshots_dir = None if from_backup: primary, target_node = network.find_primary_and_any_backup() # Retrieve snapshot from primary as only primary node # generates snapshots snapshots_dir = network.get_committed_snapshots(primary) new_node = network.create_node("local://localhost") network.join_node( new_node, args.package, args, copy_ledger_read_only=copy_ledger_read_only, target_node=target_node, snapshots_dir=snapshots_dir, from_snapshot=True, ) network.trust_node(new_node, args) with new_node.client() as c: r = c.get("/node/state") assert ( r.body.json()["startup_seqno"] != 0 ), "Node started from snapshot but reports startup seqno of 0" # Finally, verify all app entries on the new node, including historical ones # from the historical ledger network.txs.verify(node=new_node, include_historical=copy_ledger_read_only) return network
def test_new_service( network, args, install_path, binary_dir, library_dir, version, cycle_existing_nodes=False, ): LOG.info("Update constitution") primary, _ = network.find_primary() new_constitution = get_new_constitution_for_install(args, install_path) network.consortium.set_constitution(primary, new_constitution) # Note: Changes to constitution between versions should be tested here LOG.info(f"Add node to new service [cycle nodes: {cycle_existing_nodes}]") nodes_to_cycle = network.get_joined_nodes() if cycle_existing_nodes else [] nodes_to_add_count = len(nodes_to_cycle) if cycle_existing_nodes else 1 for _ in range(0, nodes_to_add_count): new_node = network.create_node( "local://localhost", binary_dir=binary_dir, library_dir=library_dir, version=version, ) network.join_node(new_node, args.package, args) network.trust_node(new_node, args) new_node.verify_certificate_validity_period( expected_validity_period_days=DEFAULT_NODE_CERTIFICATE_VALIDITY_DAYS ) for node in nodes_to_cycle: network.retire_node(primary, node) if primary == node: primary, _ = network.wait_for_new_primary(primary) node.stop() test_all_nodes_cert_renewal(network, args) LOG.info("Apply transactions to new nodes only") issue_activity_on_live_service(network, args) test_random_receipts(network, args, lts=True)
def test_learner_catches_up(network, args): primary, _ = network.find_primary() num_nodes_before = 0 with primary.client() as c: s = c.get("/node/consensus") rj = s.body.json() # At this point, there should be exactly one configuration assert len(rj["details"]["configs"]) == 1 c0 = rj["details"]["configs"][0]["nodes"] num_nodes_before = len(c0) new_node = network.create_node("local://localhost") network.join_node(new_node, args.package, args, from_snapshot=False) network.trust_node(new_node, args) with new_node.client() as c: s = c.get("/node/network/nodes/self") rj = s.body.json() assert rj["status"] == "Learner" or rj["status"] == "Trusted" network.wait_for_node_in_store( primary, new_node.node_id, node_status=(ccf.ledger.NodeStatus.TRUSTED), timeout=3, ) with primary.client() as c: s = c.get("/node/consensus") rj = s.body.json() assert len(rj["details"]["learners"]) == 0 # At this point, there should be exactly one configuration, which includes the new node. assert len(rj["details"]["configs"]) == 1 c0 = rj["details"]["configs"][0]["nodes"] assert len(c0) == num_nodes_before + 1 assert new_node.node_id in c0 return network
def test_migration_2tx_reconfiguration(network, args, initial_is_1tx=True, valid_from=None, **kwargs): primary, _ = network.find_primary() # Check that the service config agrees that this is a 1tx network with primary.client() as c: s = c.get("/node/service/configuration").body.json() if initial_is_1tx: assert s["reconfiguration_type"] == "OneTransaction" network.consortium.submit_2tx_migration_proposal(primary) network.wait_for_all_nodes_to_commit(primary) # Check that the service config has been updated with primary.client() as c: rj = c.get("/node/service/configuration").body.json() assert rj["reconfiguration_type"] == "TwoTransaction" # Check that all nodes have updated their consensus parameters for node in network.nodes: with node.client() as c: rj = c.get("/node/consensus").body.json() assert "reconfiguration_type" in rj["details"] assert rj["details"]["reconfiguration_type"] == "TwoTransaction" assert len(rj["details"]["learners"]) == 0 new_node = network.create_node("local://localhost", **kwargs) network.join_node(new_node, args.package, args) network.trust_node(new_node, args, valid_from=valid_from) # Check that the new node has the right consensus parameter with new_node.client() as c: rj = c.get("/node/consensus").body.json() assert "reconfiguration_type" in rj["details"] assert "learners" in rj["details"] assert rj["details"]["reconfiguration_type"] == "TwoTransaction" assert len(rj["details"]["learners"]) == 0
def test_add_node(network, args): new_node = network.create_node("local://localhost") network.join_node(new_node, args.package, args, from_snapshot=False) # Verify self-signed node certificate validity period new_node.verify_certificate_validity_period() network.trust_node( new_node, args, validity_period_days=args.max_allowed_node_cert_validity_days // 2, ) with new_node.client() as c: s = c.get("/node/state") assert s.body.json()["node_id"] == new_node.node_id assert ( s.body.json()["startup_seqno"] == 0 ), "Node started without snapshot but reports startup seqno != 0" # Now that the node is trusted, verify endorsed certificate validity period new_node.verify_certificate_validity_period() return network
def test_learner_does_not_take_part(network, args): primary, backups = network.find_nodes() f_backups = backups[:network.get_f() + 1] # Note: host is supplied explicitly to avoid having differently # assigned IPs for the interfaces, something which the test infra doesn't # support widely yet. operator_rpc_interface = "operator_rpc_interface" host = infra.net.expand_localhost() new_node = network.create_node( infra.interfaces.HostSpec( rpc_interfaces={ infra.interfaces.PRIMARY_RPC_INTERFACE: infra.interfaces.RPCInterface(host=host), operator_rpc_interface: infra.interfaces.RPCInterface( host=host, endorsement=infra.interfaces.Endorsement( authority=infra.interfaces.EndorsementAuthority.Node), ), })) network.join_node(new_node, args.package, args, from_snapshot=False) LOG.info("Wait for all nodes to have committed join of new pending node") network.wait_for_all_nodes_to_commit(primary=primary) # Here, we partition a majority of backups. This is very intentional so that # the new learner node is not promoted to trusted while the partition is up. # However, this means that the isolated majority of backups can (and will) # elect one of them as new primary while the partition is up. When the partition # is lifted, all the transactions executed of the primary node (including # trusting the new node) will be rolled back. Because of this, we issue a new # trust_node proposal to make sure the new node ends up being trusted and joins # successfully. with network.partitioner.partition(f_backups): check_does_not_progress(primary, timeout=5) try: network.consortium.trust_node( primary, new_node.node_id, timeout=ceil(args.join_timer_s * 2), valid_from=datetime.now(), ) except TimeoutError: LOG.info("Trust node proposal did not commit as expected") else: raise Exception("Trust node proposal committed unexpectedly") check_does_not_progress(primary, timeout=5) LOG.info("Majority partition can make progress") partition_primary, _ = network.wait_for_new_primary(primary, nodes=f_backups) check_can_progress(partition_primary) LOG.info( "New joiner is not promoted to Trusted without f other backups") with new_node.client(interface_name=operator_rpc_interface, verify_ca=False) as c: r = c.get("/node/network/nodes/self") assert r.body.json()["status"] == "Learner" r = c.get("/node/consensus") assert new_node.node_id in r.body.json()["details"]["learners"] LOG.info( "Partition is lifted, wait for primary unanimity on original nodes") # Note: Because trusting the new node failed, the new node is not considered # in the primary unanimity. Indeed, its transition to Trusted may have been rolled back. primary = network.wait_for_primary_unanimity() network.wait_for_all_nodes_to_commit(primary=primary) LOG.info("Trust new joiner again") network.trust_node(new_node, args) check_can_progress(primary) check_can_progress(new_node)
def test_update_all_nodes(network, args): replacement_package = get_replacement_package(args) primary, _ = network.find_nodes() first_code_id = infra.utils.get_code_id(args.enclave_type, args.oe_binary, args.package) new_code_id = infra.utils.get_code_id(args.enclave_type, args.oe_binary, replacement_package) if args.enclave_type == "virtual": # Pretend this was already present network.consortium.add_new_code(primary, first_code_id) LOG.info("Add new code id") network.consortium.add_new_code(primary, new_code_id) with primary.client() as uc: r = uc.get("/node/code") versions = sorted(r.body.json()["versions"], key=lambda x: x["digest"]) expected = sorted( [ { "digest": first_code_id, "status": "AllowedToJoin" }, { "digest": new_code_id, "status": "AllowedToJoin" }, ], key=lambda x: x["digest"], ) assert versions == expected, versions LOG.info("Remove old code id") network.consortium.retire_code(primary, first_code_id) with primary.client() as uc: r = uc.get("/node/code") versions = sorted(r.body.json()["versions"], key=lambda x: x["digest"]) expected = sorted( [ { "digest": new_code_id, "status": "AllowedToJoin" }, ], key=lambda x: x["digest"], ) assert versions == expected, versions old_nodes = network.nodes.copy() LOG.info("Start fresh nodes running new code") for _ in range(0, len(old_nodes)): new_node = network.create_node("local://localhost") network.join_node(new_node, replacement_package, args) network.trust_node(new_node, args) LOG.info("Retire original nodes running old code") for node in old_nodes: primary, _ = network.find_nodes() network.retire_node(primary, node) # Elections take (much) longer than a backup removal which is just # a commit, so we need to adjust our timeout accordingly, hence this branch if node.node_id == primary.node_id: new_primary, _ = network.wait_for_new_primary(primary) primary = new_primary node.stop() LOG.info("Check the network is still functional") check_can_progress(new_node) return network
def run_code_upgrade_from( args, from_install_path, to_install_path, from_version=None, to_version=None, from_container_image=None, ): from_binary_dir, from_library_dir = get_bin_and_lib_dirs_for_install_path( from_install_path) to_binary_dir, to_library_dir = get_bin_and_lib_dirs_for_install_path( to_install_path) set_js_args(args, from_install_path, to_install_path) jwt_issuer = infra.jwt_issuer.JwtIssuer( "https://localhost", refresh_interval=args.jwt_key_refresh_interval_s) with jwt_issuer.start_openid_server(): txs = app.LoggingTxs(jwt_issuer=jwt_issuer) with infra.network.network( args.nodes, binary_directory=from_binary_dir, library_directory=from_library_dir, pdb=args.pdb, txs=txs, jwt_issuer=jwt_issuer, version=from_version, ) as network: network.start_and_open(args, node_container_image=from_container_image) old_nodes = network.get_joined_nodes() primary, _ = network.find_primary() LOG.info("Apply transactions to old service") issue_activity_on_live_service(network, args) new_code_id = infra.utils.get_code_id( args.enclave_type, args.oe_binary, args.package, library_dir=to_library_dir, ) network.consortium.add_new_code(primary, new_code_id) # Note: alternate between joining from snapshot and replaying entire ledger new_nodes = [] from_snapshot = True for _ in range(0, len(old_nodes)): new_node = network.create_node( "local://localhost", binary_dir=to_binary_dir, library_dir=to_library_dir, version=to_version, ) network.join_node(new_node, args.package, args, from_snapshot=from_snapshot) network.trust_node( new_node, args, valid_from=str( # Pre-2.0 nodes require X509 time format infra.crypto.datetime_to_X509time( datetime.datetime.now())), ) # For 2.x nodes joining a 1.x service before the constitution is updated, # the node certificate validity period is set by the joining node itself # as [node startup time, node startup time + 365 days] new_node.verify_certificate_validity_period( expected_validity_period_days= DEFAULT_NODE_CERTIFICATE_VALIDITY_DAYS, ignore_proposal_valid_from=True, ) from_snapshot = not from_snapshot new_nodes.append(new_node) # Verify that all nodes run the expected CCF version for node in network.get_joined_nodes(): # Note: /node/version endpoint was added in 2.x if not node.major_version or node.major_version > 1: with node.client() as c: r = c.get("/node/version") expected_version = node.version or args.ccf_version version = r.body.json()["ccf_version"] assert ( version == expected_version ), f"For node {node.local_node_id}, expect version {expected_version}, got {version}" LOG.info( "Apply transactions to hybrid network, with primary as old node" ) issue_activity_on_live_service(network, args) old_code_id = infra.utils.get_code_id( args.enclave_type, args.oe_binary, args.package, library_dir=from_library_dir, ) primary, _ = network.find_primary() network.consortium.retire_code(primary, old_code_id) for index, node in enumerate(old_nodes): network.retire_node(primary, node) if primary == node: primary, _ = network.wait_for_new_primary(primary) # This block is here to test the transition period from a network that # does not support custom claims to one that does. It can be removed after # the transition is complete. # # The new build, being unreleased, doesn't have a version at all if not primary.major_version: LOG.info("Upgrade to new JS app") # Upgrade to a version of the app containing an endpoint that # registers custom claims network.consortium.set_js_app_from_dir( primary, args.new_js_app_bundle) LOG.info("Run transaction with additional claim") # With wait_for_sync, the client checks that all nodes, including # the minority of old ones, have acked the transaction msg_idx = network.txs.idx + 1 txid = network.txs.issue(network, number_txs=1, record_claim=True, wait_for_sync=True) assert len(network.txs.pub[msg_idx]) == 1 claims = network.txs.pub[msg_idx][-1]["msg"] LOG.info( "Check receipts are fine, including transaction with claims" ) test_random_receipts( network, args, lts=True, additional_seqnos={txid.seqno: claims.encode()}, ) # Also check receipts on an old node if index + 1 < len(old_nodes): next_node = old_nodes[index + 1] test_random_receipts( network, args, lts=True, additional_seqnos={txid.seqno: None}, node=next_node, ) node.stop() LOG.info("Service is now made of new nodes only") # Rollover JWKS so that new primary must read historical CA bundle table # and retrieve new keys via auto refresh if not os.getenv("CONTAINER_NODES"): jwt_issuer.refresh_keys() # Note: /gov/jwt_keys/all endpoint was added in 2.x primary, _ = network.find_nodes() if not primary.major_version or primary.major_version > 1: jwt_issuer.wait_for_refresh(network) else: time.sleep(3) else: # https://github.com/microsoft/CCF/issues/2608#issuecomment-924785744 LOG.warning( "Skipping JWT refresh as running nodes in container") # Code update from 1.x to 2.x requires cycling the freshly-added 2.x nodes # once. This is because 2.x nodes will not have an endorsed certificate # recorded in the store and thus will not be able to have their certificate # refreshed, etc. test_new_service( network, args, to_install_path, to_binary_dir, to_library_dir, to_version, cycle_existing_nodes=True, ) # Check that the ledger can be parsed network.get_latest_ledger_public_state()
def test_new_service( network, args, install_path, binary_dir, library_dir, version, cycle_existing_nodes=False, ): LOG.info("Update constitution") primary, _ = network.find_primary() new_constitution = get_new_constitution_for_install(args, install_path) network.consortium.set_constitution(primary, new_constitution) all_nodes = network.get_joined_nodes() # Note: Changes to constitution between versions should be tested here LOG.info(f"Add node to new service [cycle nodes: {cycle_existing_nodes}]") nodes_to_cycle = network.get_joined_nodes() if cycle_existing_nodes else [] nodes_to_add_count = len(nodes_to_cycle) if cycle_existing_nodes else 1 # Pre-2.0 nodes require X509 time format valid_from = str(infra.crypto.datetime_to_X509time( datetime.datetime.now())) for _ in range(0, nodes_to_add_count): new_node = network.create_node( "local://localhost", binary_dir=binary_dir, library_dir=library_dir, version=version, ) network.join_node(new_node, args.package, args) network.trust_node( new_node, args, valid_from=valid_from, ) new_node.verify_certificate_validity_period( expected_validity_period_days=DEFAULT_NODE_CERTIFICATE_VALIDITY_DAYS ) all_nodes.append(new_node) for node in nodes_to_cycle: network.retire_node(primary, node) if primary == node: primary, _ = network.wait_for_new_primary(primary) node.stop() test_all_nodes_cert_renewal(network, args, valid_from=valid_from) test_service_cert_renewal(network, args, valid_from=valid_from) LOG.info("Waiting for retired nodes to be automatically removed") for node in all_nodes: network.wait_for_node_in_store( primary, node.node_id, node_status=ccf.ledger.NodeStatus.TRUSTED if node.is_joined() else None, ) if args.check_2tx_reconfig_migration: test_migration_2tx_reconfiguration( network, args, initial_is_1tx=False, # Reconfiguration type added in 2.x binary_dir=binary_dir, library_dir=library_dir, version=version, valid_from=valid_from, ) LOG.info("Apply transactions to new nodes only") issue_activity_on_live_service(network, args) test_random_receipts(network, args, lts=True)
def run_code_upgrade_from( args, from_install_path, to_install_path, from_version=None, to_version=None, ): from_binary_dir, from_library_dir = get_bin_and_lib_dirs_for_install_path( from_install_path ) to_binary_dir, to_library_dir = get_bin_and_lib_dirs_for_install_path( to_install_path ) set_js_args(args, from_install_path) jwt_issuer = infra.jwt_issuer.JwtIssuer( "https://localhost", refresh_interval=args.jwt_key_refresh_interval_s ) with jwt_issuer.start_openid_server(): txs = app.LoggingTxs(jwt_issuer=jwt_issuer) with infra.network.network( args.nodes, binary_directory=from_binary_dir, library_directory=from_library_dir, pdb=args.pdb, txs=txs, jwt_issuer=jwt_issuer, version=from_version, ) as network: network.start_and_join(args) old_nodes = network.get_joined_nodes() primary, _ = network.find_primary() LOG.info("Apply transactions to old service") issue_activity_on_live_service(network, args) new_code_id = infra.utils.get_code_id( args.enclave_type, args.oe_binary, args.package, library_dir=to_library_dir, ) network.consortium.add_new_code(primary, new_code_id) # Note: alternate between joining from snapshot and replaying entire ledger new_nodes = [] from_snapshot = True for _ in range(0, len(old_nodes)): new_node = network.create_node( "local://localhost", binary_dir=to_binary_dir, library_dir=to_library_dir, version=to_version, ) network.join_node( new_node, args.package, args, from_snapshot=from_snapshot ) network.trust_node(new_node, args) # For 2.x nodes joining a 1.x service before the constitution is update, # the node certificate validity period is set by the joining node itself # as [node startup time, node startup time + 365 days] new_node.verify_certificate_validity_period( expected_validity_period_days=DEFAULT_NODE_CERTIFICATE_VALIDITY_DAYS, ignore_proposal_valid_from=True, ) from_snapshot = not from_snapshot new_nodes.append(new_node) # Verify that all nodes run the expected CCF version for node in network.get_joined_nodes(): # Note: /node/version endpoint was added in 2.x if not node.major_version or node.major_version > 1: with node.client() as c: r = c.get("/node/version") expected_version = node.version or args.ccf_version version = r.body.json()["ccf_version"] assert ( version == expected_version ), f"For node {node.local_node_id}, expect version {expected_version}, got {version}" LOG.info("Apply transactions to hybrid network, with primary as old node") issue_activity_on_live_service(network, args) old_code_id = infra.utils.get_code_id( args.enclave_type, args.oe_binary, args.package, library_dir=from_library_dir, ) primary, _ = network.find_primary() network.consortium.retire_code(primary, old_code_id) for node in old_nodes: network.retire_node(primary, node) if primary == node: primary, _ = network.wait_for_new_primary(primary) node.stop() LOG.info("Service is now made of new nodes only") # Rollover JWKS so that new primary must read historical CA bundle table # and retrieve new keys via auto refresh jwt_issuer.refresh_keys() # Note: /gov/jwt_keys/all endpoint was added in 2.x primary, _ = network.find_nodes() if not primary.major_version or primary.major_version > 1: jwt_issuer.wait_for_refresh(network) else: time.sleep(3) # Code update from 1.x to 2.x requires cycling the freshly-added 2.x nodes # once. This is because 2.x nodes will not have an endorsed certificate # recorded in the store and thus will not be able to have their certificate # refreshed, etc. test_new_service( network, args, to_install_path, to_binary_dir, to_library_dir, to_version, cycle_existing_nodes=True, ) # Check that the ledger can be parsed network.get_latest_ledger_public_state()