def doit(s, n, N, k, monkeys, timeout): global block_timeout, balances_timeout, tx_tolerance assert 2 <= n <= N config = {'local': True, 'near_root': '../target/debug/'} local_config_changes = {} for i in range(N, N + k + 1): # make all the observers track all the shards local_config_changes[i] = {"tracked_shards": list(range(s))} near_root, node_dirs = init_cluster(N, s, k + 1, config, [["max_inflation_rate", 0], ["epoch_length", EPOCH_LENGTH], ["validator_kickout_threshold", 75]], local_config_changes) started = time.time() boot_node = spin_up_node(config, near_root, node_dirs[0], 0, None, None) boot_node.mess_with = False nodes = [boot_node] for i in range(1, N + k + 1): node = spin_up_node(config, near_root, node_dirs[i], i, boot_node.node_key.pk, boot_node.addr()) nodes.append(node) if i >= n and i < N: node.kill() node.mess_with = True else: node.mess_with = False monkey_names = [x.__name__ for x in monkeys] print(monkey_names) if 'monkey_local_network' in monkey_names or 'monkey_global_network' in monkey_names: print("There are monkeys messing up with network, initializing the infra") init_network_pillager() expect_network_issues() block_timeout += 10 tx_tolerance += 0.3 if 'monkey_node_restart' in monkey_names: expect_network_issues() if 'monkey_node_restart' in monkey_names or 'monkey_node_set' in monkey_names: block_timeout += 10 balances_timeout += 10 tx_tolerance += 0.4 stopped = Value('i', 0) error = Value('i', 0) ps = [] nonces = [(Value('i', 1), Lock()) for _ in range(N + k + 1)] def launch_process(func): nonlocal stopped, error, ps p = Process(target=func, args=(stopped, error, nodes, nonces)) p.start() ps.append((p, func.__name__)) def check_errors(): nonlocal error, ps if error.value != 0: for (p, _) in ps: p.terminate() assert False, "At least one process failed, check error messages above" for monkey in monkeys: launch_process(monkey) launch_process(blocks_tracker) started = time.time() while time.time() - started < timeout: check_errors() time.sleep(1) print("") print("==========================================") print("# TIMEOUT IS HIT, SHUTTING DOWN THE TEST #") print("==========================================") stopped.value = 1 started_shutdown = time.time() while True: check_errors() still_running = [name for (p, name) in ps if p.is_alive()] if len(still_running) == 0: break if time.time() - started_shutdown > TIMEOUT_SHUTDOWN: for (p, _) in ps: p.terminate() assert False, "The test didn't gracefully shut down in time\nStill running: %s" % (still_running) check_errors()
time.sleep(0.1) @stress_process def monkey_network_hammering(stopped, error, nodes, nonces): s = [False for x in nodes] while stopped.value == 0: node_idx = random.randint(0, len(nodes) - 2) pid = nodes[node_idx].pid.value if s[node_idx]: logger.info(f"Resuming network for process {pid}") resume_network(pid) else: logger.info(f"Stopping network for process {pid}") stop_network(pid) s[node_idx] = not s[node_idx] time.sleep(0.5) for i, x in enumerate(s): if x: pid = nodes[i].pid.value logger.info(f"Resuming network for process {pid}") resume_network(pid) expect_network_issues() init_network_pillager() doit(3, 3, 3, 0, [monkey_network_hammering, monkey_transactions_noval, monkey_staking], TIMEOUT)