Example #1
0
    def _add_clients(num_of_clients, version=None, version_separator='_'):
        # TODO make a generic function that _add_clients can use
        """
        adds a clients to namespace

        :param num_of_clients: int, number of replicas
        :param version: string, the wanted client version
        :param version_separator: string, separator to separate between client key and client version

        :return: list, all created client pods
        """
        if version and not isinstance(version, str):
            raise ValueError("version must be type string")

        if not setup_bootstrap.pods:
            raise Exception("Could not find bootstrap node")

        bs_info = setup_bootstrap.pods[0]

        client_key = 'client'
        if version:
            client_key += f'{version_separator}{version}'

        cspec = get_conf(bs_info, testconfig[client_key], testconfig['genesis_delta'])
        pods_names = add_multi_clients(testconfig, setup_bootstrap.deployment_id, cspec, size=num_of_clients)
        return pods_names
Example #2
0
    def _add_single_client():
        global client_name
        if not setup_bootstrap.pods:
            raise Exception("Could not find bootstrap node")

        bs_info = setup_bootstrap.pods[0]
        cspec = get_conf(bs_info, testconfig['client'], testconfig['genesis_delta'])
        client_name = add_multi_clients(testconfig, setup_bootstrap.deployment_id, cspec, 1)[0]
        return client_name
Example #3
0
def setup_clients_in_namespace(namespace,
                               bs_deployment_info,
                               client_deployment_info,
                               client_config,
                               genesis_time,
                               name="client",
                               file_path=None,
                               oracle=None,
                               poet=None,
                               dep_time_out=120):
    # setting stateful and deployment configuration files
    # default deployment method is 'deployment'
    dep_method = client_config[
        "deployment_type"] if "deployment_type" in client_config.keys(
        ) else "deployment"
    try:
        dep_file_path, ss_file_path = _setup_dep_ss_file_path(
            file_path, dep_method, 'client')
    except ValueError as e:
        print(f"error setting up client specification file: {e}")
        return None

    # this function used to be the way to extract the client title
    # in case we want a different title (client_v2 for example) we can specify it
    # directly in "name" input
    def _extract_label():
        return client_deployment_info.deployment_name.split('-')[1]

    cspec = get_conf(bs_deployment_info,
                     client_config,
                     genesis_time,
                     setup_oracle=oracle,
                     setup_poet=poet)

    k8s_file, k8s_create_func = choose_k8s_object_create(
        client_config, dep_file_path, ss_file_path)
    resp = k8s_create_func(k8s_file,
                           namespace,
                           deployment_id=client_deployment_info.deployment_id,
                           replica_size=client_config['replicas'],
                           container_specs=cspec,
                           time_out=dep_time_out)

    dep_name = resp.metadata._name
    client_deployment_info.deployment_name = dep_name

    client_pods = (CoreV1ApiClient().list_namespaced_pod(
        namespace,
        include_uninitialized=True,
        label_selector=("name={0}".format(name))).items)

    client_deployment_info.pods = [{
        'name': c.metadata.name,
        'pod_ip': c.status.pod_ip
    } for c in client_pods if c.metadata.name.startswith(dep_name)]
    return client_deployment_info
Example #4
0
def test_add_many_clients(init_session, setup_bootstrap, setup_clients):
    bs_info = setup_bootstrap.pods[0]
    cspec = get_conf(bs_info, testconfig['client'], testconfig['genesis_delta'])

    pods = add_multi_clients(testconfig, setup_bootstrap.deployment_id, cspec, size=4)
    time.sleep(40 * timeout_factor)  # wait for the new clients to finish bootstrap and for logs to get to elasticsearch
    fields = {'M': 'discovery_bootstrap'}
    for p in pods:
        hits = poll_query_message(indx=current_index,
                                  namespace=testconfig['namespace'],
                                  client_po_name=p,
                                  fields=fields,
                                  findFails=True,
                                  expected=1)
        assert len(hits) == 1, "Could not find new Client bootstrap message pod:{0}".format(p)
Example #5
0
def test_add_delayed_nodes(init_session, add_curl, setup_bootstrap, start_poet, save_log_on_exit):
    current_index = get_curr_ind()
    bs_info = setup_bootstrap.pods[0]
    cspec = get_conf(bs_info, test_config['client'], test_config['genesis_delta'], setup_oracle=None,
                     setup_poet=setup_bootstrap.pods[0]['pod_ip'])
    ns = test_config['namespace']

    layer_duration = int(test_config['client']['args']['layer-duration-sec'])
    layers_per_epoch = int(test_config['client']['args']['layers-per-epoch'])
    epoch_duration = layer_duration * layers_per_epoch

    # start with 20 miners
    start_count = 20
    new_client_in_namespace(ns, setup_bootstrap, cspec, start_count)
    sleep_and_print(epoch_duration)  # wait epoch duration

    # add 10 each epoch
    num_to_add = 10
    num_epochs_to_add_clients = 4
    clients = []
    for i in range(num_epochs_to_add_clients):
        clients.append(new_client_in_namespace(ns, setup_bootstrap, cspec, num_to_add))
        print("Added client batch ", i, clients[i].pods[i]['name'])
        sleep_and_print(epoch_duration)

    print("Done adding clients. Going to wait for two epochs")
    # wait two more epochs
    wait_epochs = 3
    sleep_and_print(wait_epochs * epoch_duration)

    # total = bootstrap + first clients + added clients
    total = 1 + start_count + num_epochs_to_add_clients * num_to_add
    total_epochs = 1 + num_epochs_to_add_clients + wait_epochs  # add 1 for first epoch
    total_layers = layers_per_epoch * total_epochs
    first_layer_of_last_epoch = total_layers - layers_per_epoch
    f = int(test_config['client']['args']['hare-max-adversaries'])

    # validate
    print("Waiting 2 minutes for logs to propagate")
    sleep_and_print(120)

    print("Running validation")
    expect_hare(current_index, ns, first_layer_of_last_epoch, total_layers - 1, total, f)  # validate hare
    atx_last_epoch = query_atx_published(current_index, ns, first_layer_of_last_epoch)
    queries.assert_equal_layer_hashes(current_index, ns)
    assert len(atx_last_epoch) == total  # validate num of atxs in last epoch
Example #6
0
def test_late_bootstraps(init_session, setup_bootstrap, setup_clients):
    TEST_NUM = 10
    testnames = []

    for i in range(TEST_NUM):
        client = add_multi_clients(testconfig, setup_bootstrap.deployment_id,
                                   get_conf(setup_bootstrap.pods[0], testconfig['client'], testconfig['genesis_delta']),
                                   1)
        testnames.append((client[0], datetime.now()))

    # Need to sleep for a while in order to enable the
    # propagation of the gossip message
    time.sleep(TEST_NUM * timeout_factor)

    fields = {'M': 'discovery_bootstrap'}
    for i in testnames:
        hits = poll_query_message(indx=current_index,
                                  namespace=testconfig['namespace'],
                                  client_po_name=i[0],
                                  fields=fields,
                                  findFails=False,
                                  expected=1)

        assert len(hits) == 1, "Could not find new Client bootstrap message. client: {0}".format(i[0])
Example #7
0
def test_sync_stress(init_session, setup_bootstrap, save_log_on_exit):
    # currently the only data we have is for 2.5 days, ~700+ layers
    max_time_in_mins = 20
    max_time_for_sync_mins = max_time_in_mins

    clients_num = testconfig["client"]["replicas"]
    bs_info = setup_bootstrap.pods[0]
    cspec = get_conf(bs_info, testconfig['client'],
                     testconfig['genesis_delta'])
    _ = add_multi_clients(testconfig, init_session, cspec, clients_num)

    hits = []
    number_of_pods = clients_num + 1  # add 1 for bootstrap pod
    tts = 70
    while len(hits) != number_of_pods:
        print(
            f"waiting for all clients to finish downloading all files, sleeping for {tts} seconds"
        )
        time.sleep(tts)
        hits = q.get_all_msg_containing(init_session, init_session,
                                        "Done downloading")

    del cspec.args['remote-data']
    cspec.args['data-folder'] = ""

    # Adding a single new client
    res_lst = add_multi_clients(testconfig, init_session, cspec, 1, 'client')
    new_client = res_lst[0]

    # wait for the new node to start syncing
    while True:
        start_sync_hits = q.get_all_msg_containing(init_session,
                                                   new_client,
                                                   START_SYNC,
                                                   is_print=False)
        if start_sync_hits:
            print(f"new client started syncing\n")
            break

        tts = 60
        print(f"new client did not start syncing yet sleeping for {tts} secs")
        time.sleep(tts)

    curr_try = 0
    # longest run witnessed ~18:00 minutes (12:00 minutes is the shortest), 2.5 days data, 700+ layers
    max_retries = max_time_in_mins
    interval_time = 60
    print("waiting for new client to be synced")
    while True:
        hits = q.get_all_msg_containing(init_session,
                                        new_client,
                                        SYNC_DONE,
                                        is_print=False)
        if hits:
            print(
                f"synced after {curr_try}/{max_retries} tries of {interval_time} seconds each\n"
            )
            break

        print(
            f"not synced after {curr_try}/{max_retries} tries of {interval_time} secs each",
            end="\r")
        time.sleep(interval_time)

        curr_try += 1
        assert curr_try <= max_retries, f"node failed syncing after waiting for {max_retries} minutes"

    # There are several messages containing "start synchronize" according to Almog,
    # this is due to a bug in the sync test binary.
    # We would like the timestamp of the latest one.
    start_sync_hits = q.get_all_msg_containing(init_session,
                                               new_client,
                                               START_SYNC,
                                               is_print=False)
    last_sync_msg = start_sync_hits[-1]
    # parsing sync start time
    st = convert_ts_to_datetime(last_sync_msg["T"])
    et = convert_ts_to_datetime(hits[0]["T"])

    ass_err = f"it took too long for syncing: {str(et - st)}, max {max_retries} minutes"
    passed_minutes = (et - st).seconds / 60
    assert passed_minutes < max_time_for_sync_mins, ass_err

    # total time since starting sync until finishing
    print(f"new client is synced after {str(et - st)}")
    assert 1
Example #8
0
def test_sync_gradually_add_nodes(init_session, setup_bootstrap,
                                  save_log_on_exit):
    current_index = get_curr_ind()
    bs_info = setup_bootstrap.pods[0]

    gen_delt = testconfig['genesis_delta']
    cspec = get_conf(bs_info, testconfig['client'], gen_delt)
    cspec2 = get_conf(bs_info, testconfig['clientv2'], gen_delt)

    inf = add_multi_clients(testconfig, init_session, cspec, 10)

    del cspec.args['remote-data']
    del cspec.args['data-folder']

    num_clients = 4
    clients = [None] * num_clients
    clients[0] = add_multi_clients(testconfig, init_session, cspec2, 1,
                                   'clientv2')[0]
    time.sleep(10)
    clients[1] = add_multi_clients(testconfig, init_session, cspec, 1,
                                   'client')[0]
    time.sleep(20)
    clients[2] = add_multi_clients(testconfig, init_session, cspec, 1,
                                   'client')[0]
    time.sleep(20)
    clients[3] = add_multi_clients(testconfig, init_session, cspec, 1,
                                   'client')[0]

    print("take pod down ", clients[0])

    delete_pod(testconfig['namespace'], clients[0])

    print("sleep for 20 sec")
    time.sleep(20)

    print("waiting for pods to be done with sync")

    start = time.time()
    sleep = 30  # seconds
    num_iter = 25  # total of 5 minutes
    for i in range(num_iter):
        done = 0
        for j in range(0, num_clients):
            pod_name = clients[j]
            if not check_pod_logs(pod_name, SYNC_DONE):  # not all done
                print("pod " + pod_name + " still not done. Going to sleep")
                break  # stop check and sleep
            else:
                print("pod " + pod_name + " done")
                done = done + 1

        if done == num_clients:
            print("all pods done")
            break

        print("not done yet sleep for " + str(sleep) + " seconds")
        time.sleep(sleep)

    assert done == num_clients

    end = time.time()

    check_pod_logs(clients[0], PERSISTENT_DATA)
    queries.assert_equal_layer_hashes(current_index, testconfig['namespace'])

    print("it took " + str(end - start) + " to sync all nodes with " +
          cspec.args['expected-layers'] + "layers")
    print("done!!")
Example #9
0
def test_add_node_validate_atx(init_session, setup_network):
    curr_epoch = 0
    epochs_to_sleep = 2
    layer_duration = int(testconfig['client']['args']['layer-duration-sec'])
    layers_per_epoch = int(testconfig['client']['args']['layers-per-epoch'])
    layer_avg_size = int(testconfig['client']['args']['layer-average-size'])
    num_miners = int(testconfig['client']['replicas']) + 1  # add 1 for bs node

    print(
        f"\nlayer duration={layer_duration}, layers per epoch={layers_per_epoch}, layer avg size={layer_avg_size}"
    )
    # wait for 2 epochs
    last_layer = epochs_to_sleep * layers_per_epoch
    print(f"wait until second epoch to layer {last_layer}")
    _ = q.wait_for_latest_layer(init_session, last_layer, layers_per_epoch,
                                num_miners)

    # ========================== epoch i+2 ==========================
    curr_epoch += epochs_to_sleep
    print("\n\n-------- current epoch", curr_epoch, "--------")
    print("adding a new miner")
    bs_info = setup_network.bootstrap.pods[0]
    cspec = get_conf(bs_info, testconfig['client'],
                     testconfig['genesis_delta'])
    new_pod_name = add_multi_clients(testconfig, init_session, cspec, 1)[0]

    # wait for next epoch
    last_layer = layers_per_epoch * (curr_epoch + 1)
    print(f"wait until next epoch to layer {last_layer}")
    _ = q.wait_for_latest_layer(init_session, last_layer, layers_per_epoch,
                                num_miners + 1)

    # ========================== epoch i+3 ==========================
    curr_epoch += 1
    print("\n\n-------- current epoch", curr_epoch, "--------")
    block_map, _ = q.get_blocks_per_node_and_layer(init_session)
    print(
        f"-------- validating blocks per nodes up to layer {last_layer} --------"
    )
    # we're querying for block creation without epoch constrain, this will result
    # with epochs where new or deleted nodes will return 0 blocks in certain epochs
    # we should ignore those
    new_pod_id = get_pod_id(init_session, new_pod_name)
    ignore_lst = [new_pod_id]
    validate_blocks_per_nodes(block_map,
                              0,
                              last_layer,
                              layers_per_epoch,
                              layer_avg_size,
                              num_miners,
                              ignore_lst=ignore_lst)

    # wait an epoch
    prev_layer = last_layer
    last_layer = layers_per_epoch * (curr_epoch + 1)
    print(f"wait until next epoch to layer {last_layer}")
    _ = q.wait_for_latest_layer(init_session, last_layer, layers_per_epoch,
                                num_miners + 1)

    # ========================== epoch i+4 ==========================
    curr_epoch += 1
    print("\n\n-------- current epoch", curr_epoch, "--------")
    block_map, _ = q.get_blocks_per_node_and_layer(init_session)
    # assert that each node has created layer_avg/number_of_nodes
    print(
        f"-------- validating blocks per nodes up to layer {last_layer} --------"
    )
    validate_blocks_per_nodes(block_map,
                              prev_layer,
                              last_layer,
                              layers_per_epoch,
                              layer_avg_size,
                              num_miners,
                              ignore_lst=ignore_lst)

    print("-------- validating all nodes ATX creation in last epoch --------")
    atx_hits = q.query_atx_per_epoch(init_session, curr_epoch - 1)
    assert len(atx_hits) == num_miners + 1  # add 1 for new miner
    print("-------- validation succeed --------")

    last_layer = layers_per_epoch * (curr_epoch + 2)
    print(f"wait 2 epochs for layer {last_layer}")
    _ = q.wait_for_latest_layer(init_session, last_layer, layers_per_epoch,
                                num_miners + 1)

    # ========================== epoch i+6 ==========================
    curr_epoch += 2
    print("\n\n-------- current epoch", curr_epoch, "--------")
    # previous epoch all nodes are supposed to know our new node ATX
    num_miners += 1
    # assert each node has created layer_avg/number_of_nodes
    print(
        f"-------- validating blocks per nodes up to layer {last_layer} --------"
    )
    block_map, _ = q.get_blocks_per_node_and_layer(init_session)
    prev_layer = last_layer - layers_per_epoch
    validate_blocks_per_nodes(block_map, prev_layer, last_layer,
                              layers_per_epoch, layer_avg_size, num_miners)
Example #10
0
def test_unsync_while_genesis(init_session, setup_bootstrap, start_poet, add_curl):
    time_to_create_block_since_startup = 10
    time_before_first_block = testconfig["genesis_delta"] + time_to_create_block_since_startup
    layers_to_wait = 4

    layer_duration = int(testconfig['client']['args']['layer-duration-sec'])
    bs_info = setup_bootstrap.pods[0]
    cspec = get_conf(bs_info, testconfig['client'], testconfig['genesis_delta'], setup_oracle=None,
                     setup_poet=setup_bootstrap.pods[0]['pod_ip'])

    # Create a cluster of nodes
    _ = new_client_in_namespace(testconfig['namespace'], setup_bootstrap, cspec, 9)

    # Sleep to enable block creation
    print(f"sleeping for {time_before_first_block} seconds in order to enable blocks to be published\n")
    time.sleep(time_before_first_block)

    # Validate a block was published
    nodes_published_block, _ = q.get_blocks_per_node_and_layer(init_session)
    assert nodes_published_block, f"no blocks were published during the first {time_before_first_block} seconds"

    # Create a new node in cluster
    unsynced_cl = new_client_in_namespace(testconfig['namespace'], setup_bootstrap, cspec, 1)

    # Sleep until layers_to_wait layer, default is 4
    print(f"sleeping for {layer_duration * layers_to_wait} seconds\n")
    time.sleep(layer_duration * layers_to_wait)

    # Found by Almogs: "validate votes failed" error has occurred following a known bug
    print("validating no 'validate votes failed' messages has arrived")
    hits_val_failed = q.get_all_msg_containing(init_session, init_session, "validate votes failed")
    assert hits_val_failed == [], 'got a "validate votes" failed message'
    print("validation succeeded")

    # Get the msg when app started on the late node
    app_started_hits = q.get_app_started_msgs(init_session, unsynced_cl.pods[0]["name"])
    assert app_started_hits, f"app did not start for new node after {layers_to_wait} layers"

    # Check if the new node has finished syncing
    hits_synced = q.get_done_syncing_msgs(init_session, unsynced_cl.pods[0]["name"])
    assert hits_synced, f"New node did not sync, waited for {layers_to_wait} layers"

    print(f"{hits_synced[0].kubernetes.pod_name} has performed sync")

    # validate no new blocks were received before being synced
    sync_ts = hits_synced[0].T
    app_started_ts = app_started_hits[0].T

    hits_msg_block = q.get_block_creation_msgs(init_session, unsynced_cl.pods[0]["name"], from_ts=app_started_ts,
                                               to_ts=sync_ts)

    if hits_msg_block:
        print("\n\n############ WARNING: node created blocks before syncing!!!! ############\n\n")

    hits_errors = q.find_error_log_msgs(init_session, init_session)
    if hits_errors:
        print_hits_entry_count(hits_errors, "message")
        # assert 0, "found log errors"

    print("successfully finished")
    assert 1