Beispiel #1
0
    def wait_for_all_nodes_to_commit(self, primary=None, tx_id=None, timeout=10):
        """
        Wait for all nodes to have joined the network and committed all transactions
        executed on the primary.
        """
        if not (primary or tx_id):
            raise ValueError("Either a valid TxID or primary node should be specified")

        end_time = time.time() + timeout

        # If no TxID is specified, retrieve latest readable one
        if tx_id == None:
            while time.time() < end_time:
                with primary.client() as c:
                    resp = c.get(
                        "/node/network/nodes/self"
                    )  # Well-known read-only endpoint
                    tx_id = TxID(resp.view, resp.seqno)
                    if tx_id.valid():
                        break
                time.sleep(0.1)
            assert (
                tx_id.valid()
            ), f"Primary {primary.node_id} has not made any progress yet ({tx_id})"

        caught_up_nodes = []
        logs = {}
        while time.time() < end_time:
            caught_up_nodes = []
            for node in self.get_joined_nodes():
                with node.client() as c:
                    logs[node.node_id] = []
                    resp = c.get(
                        f"/node/local_tx?transaction_id={tx_id}",
                        log_capture=logs[node.node_id],
                    )
                    if resp.status_code != 200:
                        # Node may not have joined the network yet, try again
                        break
                    status = TxStatus(resp.body.json()["status"])
                    if status == TxStatus.Committed:
                        caught_up_nodes.append(node)
                    elif status == TxStatus.Invalid:
                        flush_info(logs[node.node_id], None, 0)
                        raise RuntimeError(
                            f"Node {node.node_id} reports transaction ID {tx_id} is invalid and will never be committed"
                        )
                    else:
                        pass

            if len(caught_up_nodes) == len(self.get_joined_nodes()):
                break
            time.sleep(0.1)

        for lines in logs.values():
            flush_info(lines, None, 0)
        assert len(caught_up_nodes) == len(
            self.get_joined_nodes()
        ), f"Only {len(caught_up_nodes)} (out of {len(self.get_joined_nodes())}) nodes have joined the network"
Beispiel #2
0
def check_can_progress(node, timeout=3):
    with node.client() as c:
        r = c.get("/node/commit")
        original_tx = TxID.from_str(r.body.json()["transaction_id"])
        with node.client("user0") as uc:
            uc.post("/app/log/private", {"id": 42, "msg": "Hello world"})
        end_time = time.time() + timeout
        while time.time() < end_time:
            current_tx = TxID.from_str(
                c.get("/node/commit").body.json()["transaction_id"])
            if current_tx.seqno > original_tx.seqno:
                return current_tx
            time.sleep(0.1)
        assert False, f"Stuck at {r}"
Beispiel #3
0
 def get_latest_ledger_public_state(self, timeout=5):
     primary, _ = self.find_primary()
     with primary.client() as nc:
         resp = nc.get("/node/commit")
         body = resp.body.json()
         tx_id = TxID.from_str(body["transaction_id"])
     return self._get_ledger_public_view_at(
         primary, primary.get_ledger_public_state_at, tx_id.seqno, timeout)
Beispiel #4
0
    def process_next(self):
        with self.primary.client() as client:
            rv = client.get("/node/commit")
            tx_id = TxID.from_str(rv.body.json()["transaction_id"])
            more_to_process = self.commit != tx_id.seqno
            self.commit = tx_id.seqno

            return more_to_process
Beispiel #5
0
 def from_requests_response(rr):
     tx_id = TxID.from_str(rr.headers.get(CCF_TX_ID_HEADER))
     return Response(
         status_code=rr.status_code,
         body=RequestsResponseBody(rr),
         seqno=tx_id.seqno,
         view=tx_id.view,
         headers=rr.headers,
     )
Beispiel #6
0
def run(args):
    with infra.service_load.load() as load:
        with infra.network.network(
                args.nodes,
                args.binary_dir,
                args.debug_nodes,
                args.perf_nodes,
                pdb=args.pdb,
                service_load=load,
        ) as network:
            check = infra.checker.Checker()

            network.start_and_open(args)
            current_view = None
            primary, current_view = network.find_primary()

            # Number of nodes F to stop until network cannot make progress
            nodes_to_stop = math.ceil(len(args.nodes) / 2)
            if args.consensus == "BFT":
                nodes_to_stop = math.ceil(len(args.nodes) / 3)

            primary_is_known = True
            for node_to_stop in range(nodes_to_stop):
                primary, current_view = network.find_primary()

                LOG.debug(
                    "Commit new transactions, primary:{}, current_view:{}".
                    format(primary.local_node_id, current_view))
                with primary.client("user0") as c:
                    res = c.post(
                        "/app/log/private",
                        {
                            "id":
                            current_view,
                            "msg":
                            "This log is committed in view {}".format(
                                current_view),
                        },
                    )
                    check(res, result=True)

                LOG.debug(
                    "Waiting for transaction to be committed by all nodes")

                network.wait_for_all_nodes_to_commit(
                    tx_id=TxID(res.view, res.seqno))

                try:
                    test_kill_primary_no_reqs(network, args)
                except PrimaryNotFound:
                    if node_to_stop < nodes_to_stop - 1:
                        raise
                    else:
                        primary_is_known = False

            assert not primary_is_known, "Primary is still known"
            LOG.success("Test ended successfully.")
Beispiel #7
0
    def last_verified_txid(self) -> TxID:
        """
        Return the :py:class:`ccf.tx_id.TxID` of the last verified signature transaction in the *parsed* ledger.

        Note: The ledger should first be parsed before calling this function.

        :return: :py:class:`ccf.tx_id.TxID`
        """
        return TxID(
            self._ledger_validator.last_verified_view,
            self._ledger_validator.last_verified_seqno,
        )
Beispiel #8
0
def run(args):
    with infra.network.network(
        args.nodes, args.binary_dir, args.debug_nodes, args.perf_nodes, pdb=args.pdb
    ) as network:
        check = infra.checker.Checker()

        network.start_and_join(args)
        current_view = None

        # Number of nodes F to stop until network cannot make progress
        nodes_to_stop = math.ceil(len(args.nodes) / 2)
        if args.consensus == "bft":
            nodes_to_stop = math.ceil(len(args.nodes) / 3)

        primary_is_known = True
        for node_to_stop in range(nodes_to_stop):
            # Note that for the first iteration, the primary is known in advance anyway
            LOG.debug("Find freshly elected primary")
            # After a view change in bft, finding the new primary takes longer
            primary, current_view = network.find_primary(
                timeout=(30 if args.consensus == "bft" else 3)
            )

            LOG.debug(
                "Commit new transactions, primary:{}, current_view:{}".format(
                    primary.node_id, current_view
                )
            )
            with primary.client("user0") as c:
                res = c.post(
                    "/app/log/private",
                    {
                        "id": current_view,
                        "msg": "This log is committed in view {}".format(current_view),
                    },
                )
                check(res, result=True)

            LOG.debug("Waiting for transaction to be committed by all nodes")

            network.wait_for_all_nodes_to_commit(tx_id=TxID(res.view, res.seqno))

            try:
                test_kill_primary(network, args)
            except PrimaryNotFound:
                if node_to_stop < nodes_to_stop - 1:
                    raise
                else:
                    primary_is_known = False

        assert not primary_is_known, "Primary is still known"
        LOG.success("Test ended successfully.")
Beispiel #9
0
def test_nobuiltins_endpoints(network, args):
    primary, backups = network.find_nodes()
    with primary.client() as c:
        r = c.get("/app/commit")
        assert r.status_code == HTTPStatus.OK
        body_j = r.body.json()
        tx_id = TxID.from_str(body_j["transaction_id"])

        r = c.get("/app/node_summary")
        assert r.status_code == HTTPStatus.OK
        body_j = r.body.json()
        assert body_j["committed_view"] == tx_id.view
        assert body_j["committed_seqno"] == tx_id.seqno
        assert body_j["quote_format"] == "OE_SGX_v1"
        assert body_j["node_id"] == primary.node_id

        r = c.get("/app/api")
        assert r.status_code == HTTPStatus.OK
        openapi_spec_validator.validate_spec(r.body.json())

        r = c.get(f"/app/tx_id?seqno={tx_id.seqno}")
        assert r.status_code == HTTPStatus.OK
        body_j = r.body.json()
        assert body_j["transaction_id"] == f"{tx_id}"

        for i in range(3):
            if i != 0:
                time.sleep(1.5)
            r = c.get("/app/current_time")
            local_time = datetime.now(timezone.utc)
            assert r.status_code == HTTPStatus.OK
            body_j = r.body.json()
            service_time = datetime.fromisoformat(body_j["timestamp"])
            diff = (local_time - service_time).total_seconds()
            # This intends to test that the reported time is "close enough"
            # to the real current time. This is dependent on the skew between
            # clocks on this executor and the target node, and the request
            # latency (including Python IO and parsing). It may need to be
            # more lenient
            assert abs(diff) < 1, diff

        r = c.get("/app/all_nodes")
        assert r.status_code == HTTPStatus.OK
        body_j = r.body.json()
        known_node_ids = [node.node_id for node in (primary, *backups)]
        for node_id, node_info in body_j["nodes"].items():
            assert (
                node_id in known_node_ids
            ), f"Response contains '{node_id}', which is not in known IDs: {known_node_ids}"
            assert node_info["quote_format"] == "OE_SGX_v1"
Beispiel #10
0
    def wait_for_all_nodes_to_catch_up(self, primary, timeout=10):
        """
        Wait for all nodes to have joined the network and globally replicated
        all transactions globally executed on the primary (including transactions
        which added the nodes).
        """
        end_time = time.time() + timeout
        while time.time() < end_time:
            with primary.client() as c:
                resp = c.get("/node/commit")
                body = resp.body.json()
                tx_id = TxID.from_str(body["transaction_id"])
                if tx_id.valid():
                    break
            time.sleep(0.1)
        assert (
            tx_id.valid()
        ), f"Primary {primary.node_id} has not made any progress yet ({tx_id})"

        caught_up_nodes = []
        while time.time() < end_time:
            caught_up_nodes = []
            for node in self.get_joined_nodes():
                with node.client() as c:
                    resp = c.get(f"/node/local_tx?transaction_id={tx_id}")
                    if resp.status_code != 200:
                        # Node may not have joined the network yet, try again
                        break
                    status = TxStatus(resp.body.json()["status"])
                    if status == TxStatus.Committed:
                        caught_up_nodes.append(node)
                    elif status == TxStatus.Invalid:
                        raise RuntimeError(
                            f"Node {node.node_id} reports transaction ID {tx_id} is invalid and will never be committed"
                        )
                    else:
                        pass

            if len(caught_up_nodes) == len(self.get_joined_nodes()):
                break
            time.sleep(0.1)
        assert len(caught_up_nodes) == len(
            self.get_joined_nodes()
        ), f"Only {len(caught_up_nodes)} (out of {len(self.get_joined_nodes())}) nodes have joined the network"
Beispiel #11
0
 def wait_for_commit_proof(self, node, seqno, timeout=3):
     # Wait that the target seqno has a commit proof on a specific node.
     # This is achieved by first waiting for a commit over seqno, issuing
     # a write request and then waiting for a commit over that
     end_time = time.time() + timeout
     while time.time() < end_time:
         with node.client(
                 self.consortium.get_any_active_member().local_id) as c:
             r = c.get("/node/commit")
             current_tx = TxID.from_str(r.body.json()["transaction_id"])
             if current_tx.seqno >= seqno:
                 # Using update_state_digest here as a convenient write tx
                 # that is app agnostic
                 r = c.post("/gov/ack/update_state_digest")
                 assert (r.status_code == http.HTTPStatus.OK.value
                         ), f"Error ack/update_state_digest: {r}"
                 c.wait_for_commit(r)
                 return True
         time.sleep(0.1)
     raise TimeoutError(
         f"seqno {seqno} did not have commit proof after {timeout}s")
Beispiel #12
0
def test_isolate_and_reconnect_primary(network, args):
    primary, backups = network.find_nodes()
    with network.partitioner.partition(backups):
        new_primary, _ = network.wait_for_new_primary(
            primary, nodes=backups, timeout_multiplier=6
        )
        new_tx = check_can_progress(new_primary)

    # Check reconnected former primary has caught up
    with primary.client() as c:
        r = c.get("/node/commit")
        timeout = 5
        end_time = time.time() + timeout
        while time.time() < end_time:
            current_tx = TxID.from_str(
                c.get("/node/commit").body.json()["transaction_id"]
            )
            if current_tx.seqno >= new_tx.seqno:
                return network
            time.sleep(0.1)
        assert False, f"Stuck at {r}"
Beispiel #13
0
def test_nobuiltins_endpoints(network, args):
    primary, _ = network.find_primary()
    with primary.client() as c:
        r = c.get("/app/commit")
        assert r.status_code == HTTPStatus.OK
        body_j = r.body.json()
        tx_id = TxID.from_str(body_j["transaction_id"])

        r = c.get("/app/node_summary")
        assert r.status_code == HTTPStatus.OK
        body_j = r.body.json()
        assert body_j["committed_view"] == tx_id.view
        assert body_j["committed_seqno"] == tx_id.seqno
        assert body_j["quote_format"] == "OE_SGX_v1"

        r = c.get("/app/api")
        assert r.status_code == HTTPStatus.OK
        openapi_spec_validator.validate_spec(r.body.json())

        r = c.get(f"/app/tx_id?seqno={tx_id.seqno}")
        assert r.status_code == HTTPStatus.OK
        body_j = r.body.json()
        assert body_j["transaction_id"] == f"{tx_id}"
Beispiel #14
0
    def from_raw(raw):
        # Raw is the output of curl, which is a full HTTP response.
        # But in the case of a redirect, it is multiple concatenated responses.
        # We want the final response, so we keep constructing new responses from this stream until we have reached the end
        while True:
            sock = FakeSocket(raw)
            response = HTTPResponse(sock)
            response.begin()
            response_len = sock.file.tell() + response.length
            raw_len = len(raw)
            if raw_len == response_len:
                break
            raw = raw[response_len:]

        raw_body = response.read()

        tx_id = TxID.from_str(response.getheader(CCF_TX_ID_HEADER))
        return Response(
            response.status,
            body=RawResponseBody(raw_body),
            seqno=tx_id.seqno,
            view=tx_id.view,
            headers=response.headers,
        )
Beispiel #15
0
def test_view_history(network, args):
    if args.consensus == "bft":
        # This appears to work in BFT, but it is unacceptably slow:
        # - Each /tx request is a write, with a non-trivial roundtrip response time
        # - Since each read (eg - /tx and /commit) has produced writes and a unique tx ID,
        #    there are too many IDs to test exhaustively
        # We could rectify this by making this test non-exhaustive (bisecting for view changes,
        # sampling within a view), but for now it is exhaustive and Raft-only
        LOG.warning("Skipping view reconstruction in BFT")
        return network

    check = infra.checker.Checker()

    previous_node = None
    previous_tx_ids = ""
    for node in network.get_joined_nodes():
        with node.client("user0") as c:
            r = c.get("/node/commit")
            check(c)

            commit_tx_id = TxID.from_str(r.body.json()["transaction_id"])

            # Retrieve status for all possible Tx IDs
            seqno_to_views = {}
            for seqno in range(1, commit_tx_id.seqno + 1):
                views = []
                for view in range(1, commit_tx_id.view + 1):
                    r = c.get(f"/node/tx?transaction_id={view}.{seqno}", log_capture=[])
                    check(r)
                    status = TxStatus(r.body.json()["status"])
                    if status == TxStatus.Committed:
                        views.append(view)
                seqno_to_views[seqno] = views

            # Check we have exactly one Tx ID for each seqno
            txs_ok = True
            for seqno, views in seqno_to_views.items():
                if len(views) != 1:
                    txs_ok = False
                    LOG.error(
                        f"Node {node.node_id}: Found {len(views)} committed Tx IDs for seqno {seqno}"
                    )

            tx_ids_condensed = ", ".join(
                " OR ".join(f"{view}.{seqno}" for view in views or ["UNKNOWN"])
                for seqno, views in seqno_to_views.items()
            )

            if txs_ok:
                LOG.success(
                    f"Node {node.node_id}: Found a valid sequence of Tx IDs:\n{tx_ids_condensed}"
                )
            else:
                LOG.error(
                    f"Node {node.node_id}: Invalid sequence of Tx IDs:\n{tx_ids_condensed}"
                )
                raise RuntimeError(
                    f"Node {node.node_id}: Incomplete or inconsistent view history"
                )

            # Compare view history between nodes
            if previous_tx_ids:
                # Some nodes may have a slightly longer view history so only compare the common prefix
                min_tx_ids_len = min(len(previous_tx_ids), len(tx_ids_condensed))
                assert (
                    tx_ids_condensed[:min_tx_ids_len]
                    == previous_tx_ids[:min_tx_ids_len]
                ), f"Tx IDs don't match between node {node.node_id} and node {previous_node.node_id}: {tx_ids_condensed[:min_tx_ids_len]} and {previous_tx_ids[:min_tx_ids_len]}"

            previous_tx_ids = tx_ids_condensed
            previous_node = node

    return network
Beispiel #16
0
 def last_verified_txid(self) -> TxID:
     return TxID(self.last_verified_view, self.last_verified_seqno)
Beispiel #17
0
    def issue(
        self,
        network,
        number_txs=1,
        on_backup=False,
        repeat=False,
        idx=None,
        wait_for_sync=True,
        log_capture=None,
    ):
        self.network = network
        remote_node, _ = network.find_primary(log_capture=log_capture)
        if on_backup:
            remote_node = network.find_any_backup()

        LOG.info(
            f"Applying {number_txs} logging txs to node {remote_node.local_node_id}"
        )

        with remote_node.client(self.user) as c:
            check_commit = infra.checker.Checker(c)

            for _ in range(number_txs):
                if not repeat and idx is None:
                    self.idx += 1

                target_idx = idx
                if target_idx is None:
                    target_idx = self.idx

                priv_msg = f"Private message at idx {target_idx} [{len(self.priv[target_idx])}]"
                rep_priv = c.post(
                    "/app/log/private",
                    {
                        "id": target_idx,
                        "msg": priv_msg,
                    },
                    headers=self._get_headers_base(),
                    log_capture=log_capture,
                )
                self.priv[target_idx].append({
                    "msg": priv_msg,
                    "seqno": rep_priv.seqno,
                    "view": rep_priv.view
                })

                pub_msg = (
                    f"Public message at idx {target_idx} [{len(self.pub[target_idx])}]"
                )
                rep_pub = c.post(
                    "/app/log/public",
                    {
                        "id": target_idx,
                        "msg": pub_msg,
                    },
                    headers=self._get_headers_base(),
                    log_capture=log_capture,
                )
                self.pub[target_idx].append({
                    "msg": pub_msg,
                    "seqno": rep_pub.seqno,
                    "view": rep_pub.view
                })
            if number_txs and wait_for_sync:
                check_commit(rep_pub, result=True)

        if wait_for_sync:
            network.wait_for_all_nodes_to_commit(
                tx_id=TxID(rep_pub.view, rep_pub.seqno))
Beispiel #18
0
    def issue(
        self,
        network,
        number_txs=1,
        on_backup=False,
        repeat=False,
        idx=None,
        wait_for_sync=True,
        log_capture=None,
        send_private=True,
        send_public=True,
        record_claim=False,
        msg=None,
        user=None,
        url_suffix=None,
    ):
        self.network = network
        remote_node, _ = network.find_primary(log_capture=log_capture)
        if on_backup:
            remote_node = network.find_any_backup()

        LOG.info(
            f"Applying {number_txs} logging txs to node {remote_node.local_node_id}"
        )

        headers = None
        if not user:
            headers = self._get_headers_base()

        with remote_node.client(user or self.user) as c:
            check_commit = infra.checker.Checker(c)

            for _ in range(number_txs):
                if not repeat and idx is None:
                    self.idx += 1

                target_idx = idx
                if target_idx is None:
                    target_idx = self.idx

                if send_private:
                    if msg:
                        priv_msg = msg
                    else:
                        priv_msg = f"Private message at idx {target_idx} [{len(self.priv[target_idx])}]"
                    args = {"id": target_idx, "msg": priv_msg}
                    if self.scope is not None:
                        args["scope"] = self.scope
                    url = "/app/log/private"
                    if url_suffix:
                        url += "/" + url_suffix
                    if self.scope is not None:
                        url += "?scope=" + self.scope
                    rep_priv = c.post(
                        url,
                        args,
                        headers=headers,
                        log_capture=log_capture,
                    )
                    assert rep_priv.status_code == http.HTTPStatus.OK, rep_priv
                    self.priv[target_idx].append(
                        {
                            "msg": priv_msg,
                            "seqno": rep_priv.seqno,
                            "view": rep_priv.view,
                            "scope": self.scope,
                        }
                    )
                    wait_point = rep_priv

                if send_public:
                    if msg:
                        pub_msg = msg
                    else:
                        pub_msg = f"Public message at idx {target_idx} [{len(self.pub[target_idx])}]"
                    payload = {
                        "id": target_idx,
                        "msg": pub_msg,
                    }
                    url = "/app/log/public"
                    if url_suffix:
                        url += "/" + url_suffix
                    if self.scope is not None:
                        url += "?scope=" + self.scope
                    if record_claim:
                        payload["record_claim"] = True
                    rep_pub = c.post(
                        url,
                        payload,
                        headers=headers,
                        log_capture=log_capture,
                    )
                    assert rep_pub.status_code == http.HTTPStatus.OK, rep_pub
                    self.pub[target_idx].append(
                        {
                            "msg": pub_msg,
                            "seqno": rep_pub.seqno,
                            "view": rep_pub.view,
                            "scope": self.scope,
                        }
                    )
                    wait_point = rep_pub
            if number_txs and wait_for_sync:
                check_commit(wait_point, result=True)

        if wait_for_sync:
            network.wait_for_all_nodes_to_commit(
                tx_id=TxID(wait_point.view, wait_point.seqno)
            )
        return TxID(wait_point.view, wait_point.seqno)