コード例 #1
0
def start_restaked(node_dir, rpc_port, config):
    if not config:
        config = load_config()
    near_root = config['near_root']
    command = [
        near_root + 'restaked', '--home=%s' % node_dir,
        '--rpc-url=127.0.0.1:%d' % rpc_port, '--wait-period=1']
    pid = subprocess.Popen(command).pid
    print("Starting restaked for %s, rpc = 0.0.0.0:%d" % (node_dir, rpc_port))
    atexit.register(atexit_stop_restaked, pid)
コード例 #2
0
ファイル: block_sync_archival.py プロジェクト: near/nearcore
    def __init__(self):
        node_config = {
            'archive': True,
            'tracked_shards': [0],
        }

        self._config = cluster.load_config()
        self._near_root, self._node_dirs = cluster.init_cluster(
            num_nodes=1,
            num_observers=2,
            num_shards=1,
            config=self._config,
            genesis_config_changes=[['epoch_length', EPOCH_LENGTH],
                                    ['block_producer_kickout_threshold', 80]],
            client_config_changes={
                0: node_config,
                1: node_config,
                2: node_config,
                3: node_config
            })
        self._nodes = [None] * len(self._node_dirs)
コード例 #3
0
# Generates three epochs worth of blocks
# Requests next light client block until it reaches the last final block.
# Verifies that the returned blocks are what we expect, and runs the validation on them

import sys, time

sys.path.append('lib')

from cluster import start_cluster, load_config
from lightclient import compute_block_hash, validate_light_client_block

TIMEOUT = 150
config = load_config()
client_config_changes = {}
if not config['local']:
    client_config_changes = {
        "consensus": {
            "min_block_production_delay": {
                "secs": 4,
                "nanos": 0,
            },
            "max_block_production_delay": {
                "secs": 8,
                "nanos": 0,
            },
            "max_block_wait_delay": {
                "secs": 24,
                "nanos": 0,
            },
        }
    }
コード例 #4
0
ファイル: stress.py プロジェクト: HugoByte/nearcore
def doit(s, n, N, k, monkeys, timeout):
    global block_timeout, balances_timeout, tx_tolerance, epoch_length, wait_if_restart, wipe_data, restart_sync_timeout

    assert 2 <= n <= N

    config = load_config()
    local_config_changes = {}

    monkey_names = [x.__name__ for x in monkeys]
    proxy = None
    logging.info(monkey_names)

    for i in range(N + k + 1):
        local_config_changes[i] = {
            "consensus": {"block_header_fetch_horizon": BLOCK_HEADER_FETCH_HORIZON, "state_sync_timeout": {"secs": 5, "nanos": 0}},
            "view_client_throttle_period": {"secs": 0, "nanos": 0}
        }
    for i in range(N, N + k + 1):
        # make all the observers track all the shards
        local_config_changes[i]["tracked_shards"] = list(range(s))
    if 'monkey_wipe_data' in monkey_names:
        # When data can be deleted, with the short epoch length while the node with deleted data folder is syncing,
        # other nodes can run sufficiently far ahead to GC the old data. Have one archival node to address it.
        # It is also needed, because the balances timeout is longer, and the txs can get GCed on the observer node
        # by the time it gets to checking their status.
        local_config_changes[N + k]['archive'] = True

    if 'monkey_local_network' in monkey_names or 'monkey_packets_drop' in monkey_names or 'monkey_node_restart' in monkey_names:
        expect_network_issues()
        block_timeout += 40

    if 'monkey_local_network' in monkey_names or 'monkey_packets_drop' in monkey_names:
        assert config['local'], 'Network stress operations only work on local nodes'
        drop_probability = 0.05 if 'monkey_packets_drop' in monkey_names else 0

        reject_list = RejectListProxy.create_reject_list(1)
        proxy = RejectListProxy(reject_list, drop_probability)
        tx_tolerance += 0.3

    if 'monkey_local_network' in monkey_names or 'monkey_packets_drop' in monkey_names:
        # add 15 seconds + 10 seconds for each unique network-related monkey
        balances_timeout += 15

        if 'monkey_local_network' in monkey_names:
            balances_timeout += 10

        if 'monkey_packets_drop' in monkey_names:
            wait_if_restart = True
            balances_timeout += 10

    if 'monkey_node_restart' in monkey_names or 'monkey_node_set' in monkey_names:
        balances_timeout += 10
        tx_tolerance += 0.5

    if 'monkey_wipe_data' in monkey_names:
        assert 'monkey_node_restart' in monkey_names or 'monkey_node_set' in monkey_names
        wipe_data = True
        balances_timeout += 25

        # if nodes can restart, we should give them way more time to sync.
        # if packets can also be dropped, each state-sync-related request or response lost adds 10 seconds
        # to the sync process.
        restart_sync_timeout = 45 if 'monkey_packets_drop' not in monkey_names else 90
        block_timeout += (10 if 'monkey_packets_drop' not in monkey_names else 40)

    # We need to make sure that the blocks that include txs are not garbage collected. From the first tx sent until
    # we check balances time equal to `balances_timeout * 2` passes, and the block production is capped at 1.7/s.
    # The GC keeps five epochs of blocks.
    min_epoch_length = (int((balances_timeout * 2) * 1.7) + 4) // 5
    epoch_length = max(epoch_length, min_epoch_length)


    near_root, node_dirs = init_cluster(
        N, k + 1, s, config,
        [["min_gas_price", 0], ["max_inflation_rate", [0, 1]],
         ["epoch_length", epoch_length],
         ["block_producer_kickout_threshold", 10],
         ["chunk_producer_kickout_threshold", 10]], local_config_changes)

    started = time.time()

    boot_node = spin_up_node(config, near_root, node_dirs[0], 0, None, None, proxy=proxy)
    boot_node.stop_checking_store()
    boot_node.mess_with = False
    nodes = [boot_node]

    for i in range(1, N + k + 1):
        node = spin_up_node(config, near_root, node_dirs[i], i,
                            boot_node.node_key.pk, boot_node.addr(), proxy=proxy)
        node.stop_checking_store()
        nodes.append(node)
        if i >= n and i < N:
            node.kill()
            node.mess_with = True
        else:
            node.mess_with = False

    stopped = Value('i', 0)
    error = Value('i', 0)
    ps = []
    nonces = [(Value('i', 1), Lock()) for _ in range(N + k + 1)]

    def launch_process(func):
        nonlocal stopped, error, ps

        p = Process(target=func, args=(stopped, error, nodes, nonces))
        p.start()
        ps.append((p, func.__name__))

    def check_errors():
        nonlocal error, ps
        if error.value != 0:
            for (p, _) in ps:
                p.terminate()
            assert False, "At least one process failed, check error messages above"

    for monkey in monkeys:
        launch_process(monkey)

    launch_process(blocks_tracker)

    started = time.time()
    while time.time() - started < timeout:
        check_errors()
        time.sleep(1)

    logging.info("")
    logging.info("==========================================")
    logging.info("# TIMEOUT IS HIT, SHUTTING DOWN THE TEST #")
    logging.info("==========================================")
    stopped.value = 1
    started_shutdown = time.time()
    proxies_stopped = False

    while True:
        check_errors()
        still_running = [name for (p, name) in ps if p.is_alive()]

        if len(still_running) == 0:
            break

        # If the test is running with proxies, `node_restart` and `node_set` can get
        # stuck because the proxies now are their child processes. We can't kill the
        # proxies rigth away, because that would interfere with block production, and
        # might prevent other workers (e.g. block_tracker) from completing in a timely
        # manner. Thus, kill the proxies some time into the shut down process.
        if time.time() - started_shutdown > TIMEOUT_SHUTDOWN / 2 and not proxies_stopped:
            logging.info("Shutdown is %s seconds in, shutting down proxies if any" % (TIMEOUT_SHUTDOWN / 2))
            if boot_node.proxy is not None:
                boot_node.proxy.global_stopped.value = 1
                for p in boot_node.proxy.ps:
                    p.terminate()
            proxies_stopped = True


        if time.time() - started_shutdown > TIMEOUT_SHUTDOWN:
            for (p, _) in ps:
                p.terminate()
            assert False, "The test didn't gracefully shut down in time\nStill running: %s" % (
                still_running)

    check_errors()

    logging.info("Shut down complete, executing store validity checks")
    for node in nodes:
        node.is_check_store = True
        node.check_store()
コード例 #5
0
def doit(s, n, N, k, monkeys, timeout):
    global block_timeout, balances_timeout, tx_tolerance

    assert 2 <= n <= N

    config = load_config()
    local_config_changes = {}

    for i in range(N, N + k + 1):
        # make all the observers track all the shards
        local_config_changes[i] = {"tracked_shards": list(range(s))}

    near_root, node_dirs = init_cluster(
        N, k + 1, s, config,
        [["min_gas_price", 0], ["max_inflation_rate", [0, 1]],
         ["epoch_length", EPOCH_LENGTH],
         ["block_producer_kickout_threshold", 10],
         ["chunk_producer_kickout_threshold", 10]], local_config_changes)

    monkey_names = [x.__name__ for x in monkeys]
    proxy = None
    logging.info(monkey_names)
    if 'monkey_local_network' in monkey_names or 'monkey_global_network' in monkey_names:
        assert config[
            'local'], 'Network stress operations only work on local nodes'
        reject_list = RejectListProxy.create_reject_list(1)
        proxy = RejectListProxy(reject_list)
        expect_network_issues()
        block_timeout += 40
        balances_timeout += 20
        tx_tolerance += 0.3
    if 'monkey_node_restart' in monkey_names:
        expect_network_issues()
    if 'monkey_node_restart' in monkey_names or 'monkey_node_set' in monkey_names:
        block_timeout += 40
        balances_timeout += 10
        tx_tolerance += 0.5

    started = time.time()

    boot_node = spin_up_node(config,
                             near_root,
                             node_dirs[0],
                             0,
                             None,
                             None,
                             proxy=proxy)
    boot_node.stop_checking_store()
    boot_node.mess_with = False
    nodes = [boot_node]

    for i in range(1, N + k + 1):
        node = spin_up_node(config,
                            near_root,
                            node_dirs[i],
                            i,
                            boot_node.node_key.pk,
                            boot_node.addr(),
                            proxy=proxy)
        node.stop_checking_store()
        nodes.append(node)
        if i >= n and i < N:
            node.kill()
            node.mess_with = True
        else:
            node.mess_with = False

    stopped = Value('i', 0)
    error = Value('i', 0)
    ps = []
    nonces = [(Value('i', 1), Lock()) for _ in range(N + k + 1)]

    def launch_process(func):
        nonlocal stopped, error, ps

        p = Process(target=func, args=(stopped, error, nodes, nonces))
        p.start()
        ps.append((p, func.__name__))

    def check_errors():
        nonlocal error, ps
        if error.value != 0:
            for (p, _) in ps:
                p.terminate()
            assert False, "At least one process failed, check error messages above"

    for monkey in monkeys:
        launch_process(monkey)

    launch_process(blocks_tracker)

    started = time.time()
    while time.time() - started < timeout:
        check_errors()
        time.sleep(1)

    logging.info("")
    logging.info("==========================================")
    logging.info("# TIMEOUT IS HIT, SHUTTING DOWN THE TEST #")
    logging.info("==========================================")
    stopped.value = 1
    started_shutdown = time.time()
    while True:
        check_errors()
        still_running = [name for (p, name) in ps if p.is_alive()]

        if len(still_running) == 0:
            break

        if time.time() - started_shutdown > TIMEOUT_SHUTDOWN:
            for (p, _) in ps:
                p.terminate()
            assert False, "The test didn't gracefully shut down in time\nStill running: %s" % (
                still_running)

    check_errors()

    logging.info("Shut down complete, executing store validity checks")
    for node in nodes:
        node.is_check_store = True
        node.check_store()