Exemple #1
0
def test_add_node_from_backup(network, args):
    new_node = network.create_node("local://localhost")
    network.join_node(
        new_node, args.package, args, target_node=network.find_any_backup()
    )
    network.trust_node(new_node, args)
    return network
Exemple #2
0
def test_node_data(network, args):
    with tempfile.NamedTemporaryFile(mode="w+") as ntf:
        primary, _ = network.find_primary()
        with primary.client() as c:

            def get_nodes():
                r = c.get("/node/network/nodes")
                assert r.status_code == 200, (r.status_code, r.body.text())
                return {
                    node_info["node_id"]: node_info
                    for node_info in r.body.json()["nodes"]
                }

            new_node_data = {"my_id": "0xdeadbeef", "location": "The Moon"}
            json.dump(new_node_data, ntf)
            ntf.flush()
            untrusted_node = network.create_node(
                infra.interfaces.HostSpec(
                    rpc_interfaces={
                        infra.interfaces.PRIMARY_RPC_INTERFACE:
                        infra.interfaces.RPCInterface(
                            endorsement=infra.interfaces.Endorsement(
                                authority=infra.interfaces.
                                EndorsementAuthority.Node))
                    }),
                node_data_json_file=ntf.name,
            )

            # NB: This new node joins but is never trusted
            network.join_node(untrusted_node, args.package, args)

            nodes = get_nodes()
            assert untrusted_node.node_id in nodes, nodes
            new_node_info = nodes[untrusted_node.node_id]
            assert new_node_info["node_data"] == new_node_data, new_node_info

            # Set modified node data
            new_node_data["previous_locations"] = [new_node_data["location"]]
            new_node_data["location"] = "Secret Base"

            network.consortium.set_node_data(primary, untrusted_node.node_id,
                                             new_node_data)

            nodes = get_nodes()
            assert untrusted_node.node_id in nodes, nodes
            new_node_info = nodes[untrusted_node.node_id]
            assert new_node_info["node_data"] == new_node_data, new_node_info

            # Set modified node data on trusted primary
            primary_node_data = "Some plain JSON string"
            network.consortium.set_node_data(primary, primary.node_id,
                                             primary_node_data)

            nodes = get_nodes()
            assert primary.node_id in nodes, nodes
            primary_node_info = nodes[primary.node_id]
            assert (primary_node_info["node_data"] == primary_node_data
                    ), primary_node_info

    return network
Exemple #3
0
def test_add_node_with_bad_code(network, args):
    if args.enclave_type == "virtual":
        LOG.warning(
            "Skipping test_add_node_with_bad_code with virtual enclave")
        return network

    replacement_package = ("samples/apps/logging/liblogging" if args.package
                           == "libjs_generic" else "libjs_generic")

    new_code_id = infra.utils.get_code_id(args.enclave_type, args.oe_binary,
                                          replacement_package)

    LOG.info(f"Adding a node with unsupported code id {new_code_id}")
    code_not_found_exception = None
    try:
        new_node = network.create_node("local://localhost")
        network.join_node(new_node, replacement_package, args, timeout=3)
    except infra.network.CodeIdNotFound as err:
        code_not_found_exception = err

    assert (
        code_not_found_exception is not None
    ), f"Adding a node with unsupported code id {new_code_id} should fail"

    return network
Exemple #4
0
def test_node_filter(network, args):
    primary, _ = network.find_primary_and_any_backup()
    with primary.client() as c:

        def get_nodes(status):
            r = c.get(f"/node/network/nodes?status={status}")
            nodes = r.body.json()["nodes"]
            return sorted(nodes, key=lambda node: node["node_id"])

        trusted_before = get_nodes("Trusted")
        pending_before = get_nodes("Pending")
        retired_before = get_nodes("Retired")
        new_node = network.create_node("local://localhost")
        network.join_node(new_node, args.package, args, target_node=primary)
        trusted_after = get_nodes("Trusted")
        pending_after = get_nodes("Pending")
        retired_after = get_nodes("Retired")
        assert trusted_before == trusted_after, (trusted_before, trusted_after)
        assert len(pending_before) + 1 == len(pending_after), (
            pending_before,
            pending_after,
        )
        assert retired_before == retired_after, (retired_before, retired_after)

        assert all(info["status"] == "Trusted" for info in trusted_after), trusted_after
        assert all(info["status"] == "Pending" for info in pending_after), pending_after
        assert all(info["status"] == "Retired" for info in retired_after), retired_after
    return network
Exemple #5
0
def test_add_as_many_pending_nodes(network, args):
    # Killing pending nodes should not change the raft consensus rules
    primary, _ = network.find_primary()
    number_new_nodes = len(network.nodes)
    LOG.info(
        f"Adding {number_new_nodes} pending nodes - consensus rules should not change"
    )

    new_nodes = []
    for _ in range(number_new_nodes):
        new_node = network.create_node("local://localhost")
        network.join_node(new_node, args.package, args, from_snapshot=False)
        new_nodes.append(new_node)

    for new_node in new_nodes:
        new_node.stop()

    # Even though pending nodes (half the number of nodes) are stopped,
    # service can still make progress
    check_can_progress(primary)

    # Cleanup killed pending nodes
    for new_node in new_nodes:
        network.retire_node(primary, new_node)

    wait_for_reconfiguration_to_complete(network)

    return network
Exemple #6
0
def test_add_as_many_pending_nodes(network, args):
    # Should not change the raft consensus rules (i.e. majority)
    primary, _ = network.find_primary()
    number_new_nodes = len(network.nodes)
    LOG.info(
        f"Adding {number_new_nodes} pending nodes - consensus rules should not change"
    )

    new_nodes = []
    for _ in range(number_new_nodes):
        new_node = network.create_node("local://localhost")
        network.join_node(new_node, args.package, args, from_snapshot=False)
        new_nodes.append(new_node)

    check_can_progress(primary)

    for new_node in new_nodes:
        network.retire_node(primary, new_node)

    wait_for_reconfiguration_to_complete(network)

    # Stop the retired nodes so they don't linger in the background and interfere
    # with subsequent tests
    for new_node in new_nodes:
        new_node.stop()

    return network
Exemple #7
0
def test_node_replacement(network, args):
    primary, backups = network.find_nodes()

    node_to_replace = backups[-1]
    LOG.info(f"Retiring node {node_to_replace.local_node_id}")
    network.retire_node(primary, node_to_replace)
    node_to_replace.stop()
    check_can_progress(primary)

    LOG.info("Adding one node on same address as retired node")
    replacement_node = network.create_node(
        f"local://{node_to_replace.rpc_host}:{node_to_replace.rpc_port}",
        node_port=node_to_replace.node_port,
    )
    network.join_node(replacement_node, args.package, args, from_snapshot=False)
    network.trust_node(replacement_node, args)

    assert replacement_node.node_id != node_to_replace.node_id
    assert replacement_node.rpc_host == node_to_replace.rpc_host
    assert replacement_node.node_port == node_to_replace.node_port
    assert replacement_node.rpc_port == node_to_replace.rpc_port

    allowed_to_suspend_count = network.get_f() - len(network.get_stopped_nodes())
    backups_to_suspend = backups[:allowed_to_suspend_count]
    LOG.info(
        f"Suspending {len(backups_to_suspend)} other nodes to make progress depend on the replacement"
    )
    for other_backup in backups_to_suspend:
        other_backup.suspend()
    # Confirm the network can make progress
    check_can_progress(primary)
    for other_backup in backups_to_suspend:
        other_backup.resume()

    return network
Exemple #8
0
def run_tls_san_checks(args):
    with infra.network.network(
        args.nodes,
        args.binary_dir,
        args.debug_nodes,
        args.perf_nodes,
        pdb=args.pdb,
    ) as network:
        args.common_read_only_ledger_dir = None  # Reset from previous test
        network.start_and_join(args)

        LOG.info("Check SAN value in TLS certificate")
        dummy_san = "*.dummy.com"
        new_node = network.create_node("local://localhost")
        args.san = [f"dNSName:{dummy_san}"]
        network.join_node(new_node, args.package, args)
        sans = infra.crypto.get_san_from_pem_cert(new_node.get_tls_certificate_pem())
        assert len(sans) == 1, "Expected exactly one SAN"
        assert sans[0].value == dummy_san

        LOG.info("A node started with no specified SAN defaults to public RPC host")
        dummy_public_rpc_host = "123.123.123.123"
        args.san = None
        new_node = network.create_node(f"local://localhost:0,{dummy_public_rpc_host}")
        network.join_node(new_node, args.package, args)
        sans = infra.crypto.get_san_from_pem_cert(
            new_node.get_tls_certificate_pem(use_public_rpc_host=False)
        )
        assert len(sans) == 1, "Expected exactly one SAN"
        assert sans[0].value == ipaddress.ip_address(dummy_public_rpc_host)
Exemple #9
0
def run_join_old_snapshot(args):
    txs = app.LoggingTxs("user0")
    nodes = ["local://localhost"]

    with tempfile.TemporaryDirectory() as tmp_dir:

        with infra.network.network(
            nodes,
            args.binary_dir,
            args.debug_nodes,
            args.perf_nodes,
            pdb=args.pdb,
            txs=txs,
        ) as network:
            network.start_and_open(args)
            primary, _ = network.find_primary()

            # First, retrieve and save one committed snapshot
            txs.issue(network, number_txs=args.snapshot_tx_interval)
            old_committed_snapshots = network.get_committed_snapshots(primary)
            copy(
                os.path.join(
                    old_committed_snapshots, os.listdir(old_committed_snapshots)[0]
                ),
                tmp_dir,
            )

            # Then generate another newer snapshot, and add two more nodes from it
            txs.issue(network, number_txs=args.snapshot_tx_interval)

            for _ in range(0, 2):
                new_node = network.create_node("local://localhost")
                network.join_node(
                    new_node,
                    args.package,
                    args,
                    from_snapshot=True,
                )
                network.trust_node(new_node, args)

            # Kill primary and wait for a new one: new primary is
            # guaranteed to have started from the new snapshot
            primary.stop()
            network.wait_for_new_primary(primary)

            # Start new node from the old snapshot
            try:
                new_node = network.create_node("local://localhost")
                network.join_node(
                    new_node,
                    args.package,
                    args,
                    from_snapshot=True,
                    snapshots_dir=tmp_dir,
                    timeout=3,
                )
            except infra.network.StartupSnapshotIsOld:
                pass
Exemple #10
0
def test_no_quote(network, args):
    untrusted_node = network.create_node("local://localhost")
    network.join_node(untrusted_node, args.package, args)
    with untrusted_node.client(
            ca=os.path.join(untrusted_node.common_dir,
                            f"{untrusted_node.local_node_id}.pem")) as uc:
        r = uc.get("/node/quotes/self")
        assert r.status_code == http.HTTPStatus.NOT_FOUND
    return network
Exemple #11
0
def run_tls_san_checks(args):
    with infra.network.network(
            args.nodes,
            args.binary_dir,
            args.debug_nodes,
            args.perf_nodes,
            pdb=args.pdb,
    ) as network:
        args.common_read_only_ledger_dir = None  # Reset from previous test
        network.start_and_open(args)
        network.verify_service_certificate_validity_period(
            args.initial_service_cert_validity_days)

        LOG.info("Check SAN value in TLS certificate")
        dummy_san = "*.dummy.com"
        new_node = network.create_node(
            infra.interfaces.HostSpec(
                rpc_interfaces={
                    infra.interfaces.PRIMARY_RPC_INTERFACE:
                    infra.interfaces.
                    RPCInterface(endorsement=infra.interfaces.Endorsement(
                        authority=infra.interfaces.EndorsementAuthority.Node))
                }))
        args.subject_alt_names = [f"dNSName:{dummy_san}"]
        network.join_node(new_node, args.package, args)
        sans = infra.crypto.get_san_from_pem_cert(
            new_node.get_tls_certificate_pem())
        assert len(sans) == 1, "Expected exactly one SAN"
        assert sans[0].value == dummy_san

        LOG.info(
            "A node started with no specified SAN defaults to public RPC host")
        dummy_public_rpc_host = "123.123.123.123"
        args.subject_alt_names = []

        new_node = network.create_node(
            infra.interfaces.HostSpec(
                rpc_interfaces={
                    infra.interfaces.PRIMARY_RPC_INTERFACE:
                    infra.interfaces.RPCInterface(
                        public_host=dummy_public_rpc_host,
                        endorsement=infra.interfaces.Endorsement(
                            authority=infra.interfaces.EndorsementAuthority.
                            Node),
                    )
                }))
        network.join_node(new_node, args.package, args)
        # Cannot trust the node here as client cannot authenticate dummy public IP in cert
        with open(
                os.path.join(network.common_dir,
                             f"{new_node.local_node_id}.pem"),
                encoding="utf-8",
        ) as self_signed_cert:
            sans = infra.crypto.get_san_from_pem_cert(self_signed_cert.read())
        assert len(sans) == 1, "Expected exactly one SAN"
        assert sans[0].value == ipaddress.ip_address(dummy_public_rpc_host)
Exemple #12
0
def test_add_node_with_read_only_ledger(network, args):
    network.txs.issue(network, number_txs=10)
    network.txs.issue(network, number_txs=2, repeat=True)

    new_node = network.create_node("local://localhost")
    network.join_node(
        new_node, args.package, args, from_snapshot=False, copy_ledger_read_only=True
    )
    network.trust_node(new_node, args)
    return network
Exemple #13
0
def test_new_joiner_helps_liveness(network, args):
    primary, backups = network.find_nodes()

    # Issue some transactions, so there is a ledger history that a new node must receive
    network.txs.issue(network, number_txs=10)

    # Remove a node, leaving the network frail
    network.retire_node(primary, backups[-1])
    backups[-1].stop()

    primary, backups = network.find_nodes()

    with contextlib.ExitStack() as stack:
        # Add a new node, but partition them before trusting them
        new_node = network.create_node("local://localhost")
        network.join_node(new_node, args.package, args, from_snapshot=False)
        new_joiner_partition = [new_node]
        new_joiner_rules = stack.enter_context(
            network.partitioner.partition([primary, *backups],
                                          new_joiner_partition))

        # Trust the new node, and wait for commit of this (but don't ask the new node itself, which doesn't know this yet)
        network.trust_node(new_node, args, no_wait=True)
        check_can_progress(primary)

        # Partition the primary, temporarily creating a minority service that cannot make progress
        minority_partition = backups[len(backups) // 2:] + new_joiner_partition
        minority_rules = stack.enter_context(
            network.partitioner.partition(minority_partition))
        # This is an unusual situation, where we've actually produced a dead partitioned node.
        # Initially any write requests will timeout (failed attempt at forwarding), and then
        # the node transitions to a candidate with nobody to talk to. Rather than trying to
        # catch the errors of these states quickly, we just sleep until the latter state is
        # reached, and then confirm it was reached.
        time.sleep(network.observed_election_duration)
        with backups[0].client("user0") as c:
            r = c.post("/app/log/private", {"id": 42, "msg": "Hello world"})
            assert r.status_code == http.HTTPStatus.SERVICE_UNAVAILABLE

        # Restore the new node to the service
        new_joiner_rules.drop()

        # Confirm that the new node catches up, and progress can be made in this majority partition
        network.wait_for_new_primary(primary, minority_partition)
        check_can_progress(new_node)

        # Explicitly drop rules before continuing
        minority_rules.drop()

        network.wait_for_primary_unanimity()
        primary, _ = network.find_nodes()
        network.wait_for_all_nodes_to_commit(primary=primary)
Exemple #14
0
def test_learner_does_not_take_part(network, args):
    primary, backups = network.find_nodes()
    f_backups = backups[:network.get_f() + 1]

    new_node = network.create_node("local://localhost")
    network.join_node(new_node, args.package, args, from_snapshot=False)

    with network.partitioner.partition(f_backups):

        check_does_not_progress(primary, timeout=5)

        try:
            network.consortium.trust_node(
                primary,
                new_node.node_id,
                timeout=ceil(args.join_timer * 2 / 1000),
                valid_from=str(
                    infra.crypto.datetime_to_X509time(datetime.now())),
            )
            new_node.wait_for_node_to_join(timeout=ceil(args.join_timer * 2 /
                                                        1000))
            join_failed = False
        except Exception:
            join_failed = True

        if not join_failed:
            raise Exception("join succeeded unexpectedly")

        with new_node.client(self_signed_ok=True) as c:
            r = c.get("/node/network/nodes/self")
            assert r.body.json()["status"] == "Learner"
            r = c.get("/node/consensus")
            assert new_node.node_id in r.body.json()["details"]["learners"]

        # New node joins, but cannot be promoted to TRUSTED without f other backups

        check_does_not_progress(primary, timeout=5)

        with new_node.client(self_signed_ok=True) as c:
            r = c.get("/node/network/nodes/self")
            assert r.body.json()["status"] == "Learner"
            r = c.get("/node/consensus")
            assert new_node.node_id in r.body.json()["details"]["learners"]

    network.wait_for_primary_unanimity()
    primary, _ = network.find_nodes()
    network.wait_for_all_nodes_to_commit(primary=primary)
    check_can_progress(primary)
Exemple #15
0
def test_no_quote(network, args):
    untrusted_node = network.create_node(
        infra.interfaces.HostSpec(
            rpc_interfaces={
                infra.interfaces.PRIMARY_RPC_INTERFACE:
                infra.interfaces.RPCInterface(
                    endorsement=infra.interfaces.Endorsement(
                        authority=infra.interfaces.EndorsementAuthority.Node))
            }))
    network.join_node(untrusted_node, args.package, args)
    with untrusted_node.client(
            ca=os.path.join(untrusted_node.common_dir,
                            f"{untrusted_node.local_node_id}.pem")) as uc:
        r = uc.get("/node/quotes/self")
        assert r.status_code == http.HTTPStatus.NOT_FOUND
    return network
Exemple #16
0
def test_add_node(network, args, from_snapshot=True):
    # Note: host is supplied explicitly to avoid having differently
    # assigned IPs for the interfaces, something which the test infra doesn't
    # support widely yet.
    operator_rpc_interface = "operator_rpc_interface"
    host = infra.net.expand_localhost()
    new_node = network.create_node(
        infra.interfaces.HostSpec(
            rpc_interfaces={
                infra.interfaces.PRIMARY_RPC_INTERFACE:
                infra.interfaces.RPCInterface(host=host),
                operator_rpc_interface:
                infra.interfaces.RPCInterface(
                    host=host,
                    endorsement=infra.interfaces.Endorsement(
                        authority=infra.interfaces.EndorsementAuthority.Node),
                ),
            }))
    network.join_node(new_node,
                      args.package,
                      args,
                      from_snapshot=from_snapshot)

    # Verify self-signed node certificate validity period
    new_node.verify_certificate_validity_period(
        interface_name=operator_rpc_interface)

    network.trust_node(
        new_node,
        args,
        validity_period_days=args.maximum_node_certificate_validity_days // 2,
    )

    if not from_snapshot:
        with new_node.client() as c:
            s = c.get("/node/state")
            assert s.body.json()["node_id"] == new_node.node_id
            assert (
                s.body.json()["startup_seqno"] == 0
            ), "Node started without snapshot but reports startup seqno != 0"

    # Now that the node is trusted, verify endorsed certificate validity period
    new_node.verify_certificate_validity_period()

    return network
Exemple #17
0
def test_join_straddling_primary_replacement(network, args):
    # We need a fourth node before we attempt the replacement, otherwise
    # we will reach a situation where two out four nodes in the voting quorum
    # are unable to participate (one retired and one not yet joined).
    test_add_node(network, args)
    primary, _ = network.find_primary()
    new_node = network.create_node("local://localhost")
    network.join_node(new_node, args.package, args)
    proposal_body = {
        "actions": [
            {
                "name": "transition_node_to_trusted",
                "args": {
                    "node_id": new_node.node_id,
                    "valid_from": str(datetime.now()),
                },
            },
            {
                "name": "remove_node",
                "args": {
                    "node_id": primary.node_id
                },
            },
        ]
    }

    proposal = network.consortium.get_any_active_member().propose(
        primary, proposal_body)
    network.consortium.vote_using_majority(
        primary,
        proposal,
        {
            "ballot":
            "export function vote (proposal, proposer_id) { return true }"
        },
        timeout=10,
    )

    network.wait_for_new_primary(primary)
    new_node.wait_for_node_to_join(timeout=10)

    primary.stop()
    network.nodes.remove(primary)
    wait_for_reconfiguration_to_complete(network)
    return network
Exemple #18
0
def test_add_node_invalid_validity_period(network, args):
    new_node = network.create_node("local://localhost")
    network.join_node(new_node, args.package, args)
    try:
        network.trust_node(
            new_node,
            args,
            validity_period_days=args.maximum_node_certificate_validity_days + 1,
        )
    except infra.proposal.ProposalNotAccepted:
        LOG.info(
            "As expected, node could not be trusted since its certificate validity period is invalid"
        )
    else:
        raise Exception(
            "Node should not be trusted if its certificate validity period is invalid"
        )
    return network
Exemple #19
0
def test_add_node_from_snapshot(
    network, args, copy_ledger_read_only=True, from_backup=False
):
    # Before adding the node from a snapshot, override at least one app entry
    # and wait for a new committed snapshot covering that entry, so that there
    # is at least one historical entry to verify.
    network.txs.issue(network, number_txs=1)
    for _ in range(1, args.snapshot_tx_interval):
        network.txs.issue(network, number_txs=1, repeat=True)
        last_tx = network.txs.get_last_tx(priv=True)
        if network.wait_for_snapshot_committed_for(seqno=last_tx[1]["seqno"]):
            break

    target_node = None
    snapshots_dir = None
    if from_backup:
        primary, target_node = network.find_primary_and_any_backup()
        # Retrieve snapshot from primary as only primary node
        # generates snapshots
        snapshots_dir = network.get_committed_snapshots(primary)

    new_node = network.create_node("local://localhost")
    network.join_node(
        new_node,
        args.package,
        args,
        copy_ledger_read_only=copy_ledger_read_only,
        target_node=target_node,
        snapshots_dir=snapshots_dir,
        from_snapshot=True,
    )
    network.trust_node(new_node, args)

    with new_node.client() as c:
        r = c.get("/node/state")
        assert (
            r.body.json()["startup_seqno"] != 0
        ), "Node started from snapshot but reports startup seqno of 0"

    # Finally, verify all app entries on the new node, including historical ones
    # from the historical ledger
    network.txs.verify(node=new_node, include_historical=copy_ledger_read_only)

    return network
Exemple #20
0
def test_new_service(
    network,
    args,
    install_path,
    binary_dir,
    library_dir,
    version,
    cycle_existing_nodes=False,
):
    LOG.info("Update constitution")
    primary, _ = network.find_primary()
    new_constitution = get_new_constitution_for_install(args, install_path)
    network.consortium.set_constitution(primary, new_constitution)

    # Note: Changes to constitution between versions should be tested here

    LOG.info(f"Add node to new service [cycle nodes: {cycle_existing_nodes}]")
    nodes_to_cycle = network.get_joined_nodes() if cycle_existing_nodes else []
    nodes_to_add_count = len(nodes_to_cycle) if cycle_existing_nodes else 1

    for _ in range(0, nodes_to_add_count):
        new_node = network.create_node(
            "local://localhost",
            binary_dir=binary_dir,
            library_dir=library_dir,
            version=version,
        )
        network.join_node(new_node, args.package, args)
        network.trust_node(new_node, args)
        new_node.verify_certificate_validity_period(
            expected_validity_period_days=DEFAULT_NODE_CERTIFICATE_VALIDITY_DAYS
        )

    for node in nodes_to_cycle:
        network.retire_node(primary, node)
        if primary == node:
            primary, _ = network.wait_for_new_primary(primary)
        node.stop()

    test_all_nodes_cert_renewal(network, args)

    LOG.info("Apply transactions to new nodes only")
    issue_activity_on_live_service(network, args)
    test_random_receipts(network, args, lts=True)
Exemple #21
0
def test_learner_catches_up(network, args):
    primary, _ = network.find_primary()
    num_nodes_before = 0

    with primary.client() as c:
        s = c.get("/node/consensus")
        rj = s.body.json()
        # At this point, there should be exactly one configuration
        assert len(rj["details"]["configs"]) == 1
        c0 = rj["details"]["configs"][0]["nodes"]
        num_nodes_before = len(c0)

    new_node = network.create_node("local://localhost")
    network.join_node(new_node, args.package, args, from_snapshot=False)
    network.trust_node(new_node, args)

    with new_node.client() as c:
        s = c.get("/node/network/nodes/self")
        rj = s.body.json()
        assert rj["status"] == "Learner" or rj["status"] == "Trusted"

    network.wait_for_node_in_store(
        primary,
        new_node.node_id,
        node_status=(ccf.ledger.NodeStatus.TRUSTED),
        timeout=3,
    )

    with primary.client() as c:
        s = c.get("/node/consensus")
        rj = s.body.json()
        assert len(rj["details"]["learners"]) == 0

        # At this point, there should be exactly one configuration, which includes the new node.
        assert len(rj["details"]["configs"]) == 1
        c0 = rj["details"]["configs"][0]["nodes"]
        assert len(c0) == num_nodes_before + 1
        assert new_node.node_id in c0

    return network
Exemple #22
0
def test_migration_2tx_reconfiguration(network,
                                       args,
                                       initial_is_1tx=True,
                                       valid_from=None,
                                       **kwargs):
    primary, _ = network.find_primary()

    # Check that the service config agrees that this is a 1tx network
    with primary.client() as c:
        s = c.get("/node/service/configuration").body.json()
        if initial_is_1tx:
            assert s["reconfiguration_type"] == "OneTransaction"

    network.consortium.submit_2tx_migration_proposal(primary)
    network.wait_for_all_nodes_to_commit(primary)

    # Check that the service config has been updated
    with primary.client() as c:
        rj = c.get("/node/service/configuration").body.json()
        assert rj["reconfiguration_type"] == "TwoTransaction"

    # Check that all nodes have updated their consensus parameters
    for node in network.nodes:
        with node.client() as c:
            rj = c.get("/node/consensus").body.json()
            assert "reconfiguration_type" in rj["details"]
            assert rj["details"]["reconfiguration_type"] == "TwoTransaction"
            assert len(rj["details"]["learners"]) == 0

    new_node = network.create_node("local://localhost", **kwargs)
    network.join_node(new_node, args.package, args)
    network.trust_node(new_node, args, valid_from=valid_from)

    # Check that the new node has the right consensus parameter
    with new_node.client() as c:
        rj = c.get("/node/consensus").body.json()
        assert "reconfiguration_type" in rj["details"]
        assert "learners" in rj["details"]
        assert rj["details"]["reconfiguration_type"] == "TwoTransaction"
        assert len(rj["details"]["learners"]) == 0
Exemple #23
0
def test_add_node(network, args):
    new_node = network.create_node("local://localhost")
    network.join_node(new_node, args.package, args, from_snapshot=False)

    # Verify self-signed node certificate validity period
    new_node.verify_certificate_validity_period()

    network.trust_node(
        new_node,
        args,
        validity_period_days=args.max_allowed_node_cert_validity_days // 2,
    )
    with new_node.client() as c:
        s = c.get("/node/state")
        assert s.body.json()["node_id"] == new_node.node_id
        assert (
            s.body.json()["startup_seqno"] == 0
        ), "Node started without snapshot but reports startup seqno != 0"

    # Now that the node is trusted, verify endorsed certificate validity period
    new_node.verify_certificate_validity_period()

    return network
Exemple #24
0
def test_learner_does_not_take_part(network, args):
    primary, backups = network.find_nodes()
    f_backups = backups[:network.get_f() + 1]

    # Note: host is supplied explicitly to avoid having differently
    # assigned IPs for the interfaces, something which the test infra doesn't
    # support widely yet.
    operator_rpc_interface = "operator_rpc_interface"
    host = infra.net.expand_localhost()
    new_node = network.create_node(
        infra.interfaces.HostSpec(
            rpc_interfaces={
                infra.interfaces.PRIMARY_RPC_INTERFACE:
                infra.interfaces.RPCInterface(host=host),
                operator_rpc_interface:
                infra.interfaces.RPCInterface(
                    host=host,
                    endorsement=infra.interfaces.Endorsement(
                        authority=infra.interfaces.EndorsementAuthority.Node),
                ),
            }))
    network.join_node(new_node, args.package, args, from_snapshot=False)

    LOG.info("Wait for all nodes to have committed join of new pending node")
    network.wait_for_all_nodes_to_commit(primary=primary)

    # Here, we partition a majority of backups. This is very intentional so that
    # the new learner node is not promoted to trusted while the partition is up.
    # However, this means that the isolated majority of backups can (and will)
    # elect one of them as new primary while the partition is up. When the partition
    # is lifted, all the transactions executed of the primary node (including
    # trusting the new node) will be rolled back. Because of this, we issue a new
    # trust_node proposal to make sure the new node ends up being trusted and joins
    # successfully.
    with network.partitioner.partition(f_backups):

        check_does_not_progress(primary, timeout=5)

        try:
            network.consortium.trust_node(
                primary,
                new_node.node_id,
                timeout=ceil(args.join_timer_s * 2),
                valid_from=datetime.now(),
            )
        except TimeoutError:
            LOG.info("Trust node proposal did not commit as expected")
        else:
            raise Exception("Trust node proposal committed unexpectedly")

        check_does_not_progress(primary, timeout=5)

        LOG.info("Majority partition can make progress")
        partition_primary, _ = network.wait_for_new_primary(primary,
                                                            nodes=f_backups)
        check_can_progress(partition_primary)

        LOG.info(
            "New joiner is not promoted to Trusted without f other backups")
        with new_node.client(interface_name=operator_rpc_interface,
                             verify_ca=False) as c:
            r = c.get("/node/network/nodes/self")
            assert r.body.json()["status"] == "Learner"
            r = c.get("/node/consensus")
            assert new_node.node_id in r.body.json()["details"]["learners"]

    LOG.info(
        "Partition is lifted, wait for primary unanimity on original nodes")
    # Note: Because trusting the new node failed, the new node is not considered
    # in the primary unanimity. Indeed, its transition to Trusted may have been rolled back.
    primary = network.wait_for_primary_unanimity()
    network.wait_for_all_nodes_to_commit(primary=primary)

    LOG.info("Trust new joiner again")
    network.trust_node(new_node, args)

    check_can_progress(primary)
    check_can_progress(new_node)
Exemple #25
0
def test_update_all_nodes(network, args):
    replacement_package = get_replacement_package(args)

    primary, _ = network.find_nodes()

    first_code_id = infra.utils.get_code_id(args.enclave_type, args.oe_binary,
                                            args.package)
    new_code_id = infra.utils.get_code_id(args.enclave_type, args.oe_binary,
                                          replacement_package)

    if args.enclave_type == "virtual":
        # Pretend this was already present
        network.consortium.add_new_code(primary, first_code_id)

    LOG.info("Add new code id")
    network.consortium.add_new_code(primary, new_code_id)
    with primary.client() as uc:
        r = uc.get("/node/code")
        versions = sorted(r.body.json()["versions"], key=lambda x: x["digest"])
        expected = sorted(
            [
                {
                    "digest": first_code_id,
                    "status": "AllowedToJoin"
                },
                {
                    "digest": new_code_id,
                    "status": "AllowedToJoin"
                },
            ],
            key=lambda x: x["digest"],
        )
        assert versions == expected, versions

    LOG.info("Remove old code id")
    network.consortium.retire_code(primary, first_code_id)
    with primary.client() as uc:
        r = uc.get("/node/code")
        versions = sorted(r.body.json()["versions"], key=lambda x: x["digest"])
        expected = sorted(
            [
                {
                    "digest": new_code_id,
                    "status": "AllowedToJoin"
                },
            ],
            key=lambda x: x["digest"],
        )
        assert versions == expected, versions

    old_nodes = network.nodes.copy()

    LOG.info("Start fresh nodes running new code")
    for _ in range(0, len(old_nodes)):
        new_node = network.create_node("local://localhost")
        network.join_node(new_node, replacement_package, args)
        network.trust_node(new_node, args)

    LOG.info("Retire original nodes running old code")
    for node in old_nodes:
        primary, _ = network.find_nodes()
        network.retire_node(primary, node)
        # Elections take (much) longer than a backup removal which is just
        # a commit, so we need to adjust our timeout accordingly, hence this branch
        if node.node_id == primary.node_id:
            new_primary, _ = network.wait_for_new_primary(primary)
            primary = new_primary
        node.stop()

    LOG.info("Check the network is still functional")
    check_can_progress(new_node)
    return network
Exemple #26
0
def run_code_upgrade_from(
    args,
    from_install_path,
    to_install_path,
    from_version=None,
    to_version=None,
    from_container_image=None,
):
    from_binary_dir, from_library_dir = get_bin_and_lib_dirs_for_install_path(
        from_install_path)
    to_binary_dir, to_library_dir = get_bin_and_lib_dirs_for_install_path(
        to_install_path)

    set_js_args(args, from_install_path, to_install_path)

    jwt_issuer = infra.jwt_issuer.JwtIssuer(
        "https://localhost", refresh_interval=args.jwt_key_refresh_interval_s)
    with jwt_issuer.start_openid_server():
        txs = app.LoggingTxs(jwt_issuer=jwt_issuer)
        with infra.network.network(
                args.nodes,
                binary_directory=from_binary_dir,
                library_directory=from_library_dir,
                pdb=args.pdb,
                txs=txs,
                jwt_issuer=jwt_issuer,
                version=from_version,
        ) as network:
            network.start_and_open(args,
                                   node_container_image=from_container_image)

            old_nodes = network.get_joined_nodes()
            primary, _ = network.find_primary()

            LOG.info("Apply transactions to old service")
            issue_activity_on_live_service(network, args)

            new_code_id = infra.utils.get_code_id(
                args.enclave_type,
                args.oe_binary,
                args.package,
                library_dir=to_library_dir,
            )
            network.consortium.add_new_code(primary, new_code_id)

            # Note: alternate between joining from snapshot and replaying entire ledger
            new_nodes = []
            from_snapshot = True
            for _ in range(0, len(old_nodes)):
                new_node = network.create_node(
                    "local://localhost",
                    binary_dir=to_binary_dir,
                    library_dir=to_library_dir,
                    version=to_version,
                )
                network.join_node(new_node,
                                  args.package,
                                  args,
                                  from_snapshot=from_snapshot)
                network.trust_node(
                    new_node,
                    args,
                    valid_from=str(  # Pre-2.0 nodes require X509 time format
                        infra.crypto.datetime_to_X509time(
                            datetime.datetime.now())),
                )
                # For 2.x nodes joining a 1.x service before the constitution is updated,
                # the node certificate validity period is set by the joining node itself
                # as [node startup time, node startup time + 365 days]
                new_node.verify_certificate_validity_period(
                    expected_validity_period_days=
                    DEFAULT_NODE_CERTIFICATE_VALIDITY_DAYS,
                    ignore_proposal_valid_from=True,
                )
                from_snapshot = not from_snapshot
                new_nodes.append(new_node)

            # Verify that all nodes run the expected CCF version
            for node in network.get_joined_nodes():
                # Note: /node/version endpoint was added in 2.x
                if not node.major_version or node.major_version > 1:
                    with node.client() as c:
                        r = c.get("/node/version")
                        expected_version = node.version or args.ccf_version
                        version = r.body.json()["ccf_version"]
                        assert (
                            version == expected_version
                        ), f"For node {node.local_node_id}, expect version {expected_version}, got {version}"

            LOG.info(
                "Apply transactions to hybrid network, with primary as old node"
            )
            issue_activity_on_live_service(network, args)

            old_code_id = infra.utils.get_code_id(
                args.enclave_type,
                args.oe_binary,
                args.package,
                library_dir=from_library_dir,
            )
            primary, _ = network.find_primary()
            network.consortium.retire_code(primary, old_code_id)

            for index, node in enumerate(old_nodes):
                network.retire_node(primary, node)
                if primary == node:
                    primary, _ = network.wait_for_new_primary(primary)
                    # This block is here to test the transition period from a network that
                    # does not support custom claims to one that does. It can be removed after
                    # the transition is complete.
                    #
                    # The new build, being unreleased, doesn't have a version at all
                    if not primary.major_version:
                        LOG.info("Upgrade to new JS app")
                        # Upgrade to a version of the app containing an endpoint that
                        # registers custom claims
                        network.consortium.set_js_app_from_dir(
                            primary, args.new_js_app_bundle)
                        LOG.info("Run transaction with additional claim")
                        # With wait_for_sync, the client checks that all nodes, including
                        # the minority of old ones, have acked the transaction
                        msg_idx = network.txs.idx + 1
                        txid = network.txs.issue(network,
                                                 number_txs=1,
                                                 record_claim=True,
                                                 wait_for_sync=True)
                        assert len(network.txs.pub[msg_idx]) == 1
                        claims = network.txs.pub[msg_idx][-1]["msg"]

                        LOG.info(
                            "Check receipts are fine, including transaction with claims"
                        )
                        test_random_receipts(
                            network,
                            args,
                            lts=True,
                            additional_seqnos={txid.seqno: claims.encode()},
                        )
                        # Also check receipts on an old node
                        if index + 1 < len(old_nodes):
                            next_node = old_nodes[index + 1]
                            test_random_receipts(
                                network,
                                args,
                                lts=True,
                                additional_seqnos={txid.seqno: None},
                                node=next_node,
                            )
                node.stop()

            LOG.info("Service is now made of new nodes only")

            # Rollover JWKS so that new primary must read historical CA bundle table
            # and retrieve new keys via auto refresh
            if not os.getenv("CONTAINER_NODES"):
                jwt_issuer.refresh_keys()
                # Note: /gov/jwt_keys/all endpoint was added in 2.x
                primary, _ = network.find_nodes()
                if not primary.major_version or primary.major_version > 1:
                    jwt_issuer.wait_for_refresh(network)
                else:
                    time.sleep(3)
            else:
                # https://github.com/microsoft/CCF/issues/2608#issuecomment-924785744
                LOG.warning(
                    "Skipping JWT refresh as running nodes in container")

            # Code update from 1.x to 2.x requires cycling the freshly-added 2.x nodes
            # once. This is because 2.x nodes will not have an endorsed certificate
            # recorded in the store and thus will not be able to have their certificate
            # refreshed, etc.
            test_new_service(
                network,
                args,
                to_install_path,
                to_binary_dir,
                to_library_dir,
                to_version,
                cycle_existing_nodes=True,
            )

            # Check that the ledger can be parsed
            network.get_latest_ledger_public_state()
Exemple #27
0
def test_new_service(
    network,
    args,
    install_path,
    binary_dir,
    library_dir,
    version,
    cycle_existing_nodes=False,
):
    LOG.info("Update constitution")
    primary, _ = network.find_primary()
    new_constitution = get_new_constitution_for_install(args, install_path)
    network.consortium.set_constitution(primary, new_constitution)

    all_nodes = network.get_joined_nodes()

    # Note: Changes to constitution between versions should be tested here

    LOG.info(f"Add node to new service [cycle nodes: {cycle_existing_nodes}]")
    nodes_to_cycle = network.get_joined_nodes() if cycle_existing_nodes else []
    nodes_to_add_count = len(nodes_to_cycle) if cycle_existing_nodes else 1

    # Pre-2.0 nodes require X509 time format
    valid_from = str(infra.crypto.datetime_to_X509time(
        datetime.datetime.now()))

    for _ in range(0, nodes_to_add_count):
        new_node = network.create_node(
            "local://localhost",
            binary_dir=binary_dir,
            library_dir=library_dir,
            version=version,
        )
        network.join_node(new_node, args.package, args)
        network.trust_node(
            new_node,
            args,
            valid_from=valid_from,
        )
        new_node.verify_certificate_validity_period(
            expected_validity_period_days=DEFAULT_NODE_CERTIFICATE_VALIDITY_DAYS
        )
        all_nodes.append(new_node)

    for node in nodes_to_cycle:
        network.retire_node(primary, node)
        if primary == node:
            primary, _ = network.wait_for_new_primary(primary)
        node.stop()

    test_all_nodes_cert_renewal(network, args, valid_from=valid_from)
    test_service_cert_renewal(network, args, valid_from=valid_from)

    LOG.info("Waiting for retired nodes to be automatically removed")
    for node in all_nodes:
        network.wait_for_node_in_store(
            primary,
            node.node_id,
            node_status=ccf.ledger.NodeStatus.TRUSTED
            if node.is_joined() else None,
        )

    if args.check_2tx_reconfig_migration:
        test_migration_2tx_reconfiguration(
            network,
            args,
            initial_is_1tx=False,  # Reconfiguration type added in 2.x
            binary_dir=binary_dir,
            library_dir=library_dir,
            version=version,
            valid_from=valid_from,
        )

    LOG.info("Apply transactions to new nodes only")
    issue_activity_on_live_service(network, args)
    test_random_receipts(network, args, lts=True)
Exemple #28
0
def run_code_upgrade_from(
    args,
    from_install_path,
    to_install_path,
    from_version=None,
    to_version=None,
):
    from_binary_dir, from_library_dir = get_bin_and_lib_dirs_for_install_path(
        from_install_path
    )
    to_binary_dir, to_library_dir = get_bin_and_lib_dirs_for_install_path(
        to_install_path
    )

    set_js_args(args, from_install_path)

    jwt_issuer = infra.jwt_issuer.JwtIssuer(
        "https://localhost", refresh_interval=args.jwt_key_refresh_interval_s
    )
    with jwt_issuer.start_openid_server():
        txs = app.LoggingTxs(jwt_issuer=jwt_issuer)
        with infra.network.network(
            args.nodes,
            binary_directory=from_binary_dir,
            library_directory=from_library_dir,
            pdb=args.pdb,
            txs=txs,
            jwt_issuer=jwt_issuer,
            version=from_version,
        ) as network:
            network.start_and_join(args)

            old_nodes = network.get_joined_nodes()
            primary, _ = network.find_primary()

            LOG.info("Apply transactions to old service")
            issue_activity_on_live_service(network, args)

            new_code_id = infra.utils.get_code_id(
                args.enclave_type,
                args.oe_binary,
                args.package,
                library_dir=to_library_dir,
            )
            network.consortium.add_new_code(primary, new_code_id)

            # Note: alternate between joining from snapshot and replaying entire ledger
            new_nodes = []
            from_snapshot = True
            for _ in range(0, len(old_nodes)):
                new_node = network.create_node(
                    "local://localhost",
                    binary_dir=to_binary_dir,
                    library_dir=to_library_dir,
                    version=to_version,
                )
                network.join_node(
                    new_node, args.package, args, from_snapshot=from_snapshot
                )
                network.trust_node(new_node, args)
                # For 2.x nodes joining a 1.x service before the constitution is update,
                # the node certificate validity period is set by the joining node itself
                # as [node startup time, node startup time + 365 days]
                new_node.verify_certificate_validity_period(
                    expected_validity_period_days=DEFAULT_NODE_CERTIFICATE_VALIDITY_DAYS,
                    ignore_proposal_valid_from=True,
                )
                from_snapshot = not from_snapshot
                new_nodes.append(new_node)

            # Verify that all nodes run the expected CCF version
            for node in network.get_joined_nodes():
                # Note: /node/version endpoint was added in 2.x
                if not node.major_version or node.major_version > 1:
                    with node.client() as c:
                        r = c.get("/node/version")
                        expected_version = node.version or args.ccf_version
                        version = r.body.json()["ccf_version"]
                        assert (
                            version == expected_version
                        ), f"For node {node.local_node_id}, expect version {expected_version}, got {version}"

            LOG.info("Apply transactions to hybrid network, with primary as old node")
            issue_activity_on_live_service(network, args)

            old_code_id = infra.utils.get_code_id(
                args.enclave_type,
                args.oe_binary,
                args.package,
                library_dir=from_library_dir,
            )
            primary, _ = network.find_primary()
            network.consortium.retire_code(primary, old_code_id)

            for node in old_nodes:
                network.retire_node(primary, node)
                if primary == node:
                    primary, _ = network.wait_for_new_primary(primary)
                node.stop()

            LOG.info("Service is now made of new nodes only")

            # Rollover JWKS so that new primary must read historical CA bundle table
            # and retrieve new keys via auto refresh
            jwt_issuer.refresh_keys()
            # Note: /gov/jwt_keys/all endpoint was added in 2.x
            primary, _ = network.find_nodes()
            if not primary.major_version or primary.major_version > 1:
                jwt_issuer.wait_for_refresh(network)
            else:
                time.sleep(3)

            # Code update from 1.x to 2.x requires cycling the freshly-added 2.x nodes
            # once. This is because 2.x nodes will not have an endorsed certificate
            # recorded in the store and thus will not be able to have their certificate
            # refreshed, etc.
            test_new_service(
                network,
                args,
                to_install_path,
                to_binary_dir,
                to_library_dir,
                to_version,
                cycle_existing_nodes=True,
            )

            # Check that the ledger can be parsed
            network.get_latest_ledger_public_state()