def test_dht_node(): # create dht with 50 nodes + your 51-st node dht: Dict[Endpoint, DHTID] = {} processes: List[mp.Process] = [] for i in range(50): node_id = DHTID.generate() peers = random.sample(dht.keys(), min(len(dht), 5)) pipe_recv, pipe_send = mp.Pipe(duplex=False) proc = mp.Process(target=run_node, args=(node_id, peers, pipe_send), daemon=True) proc.start() port = pipe_recv.recv() processes.append(proc) dht[f"{LOCALHOST}:{port}"] = node_id loop = asyncio.get_event_loop() me = loop.run_until_complete( DHTNode.create(initial_peers=random.sample(dht.keys(), 5), parallel_rpc=10, cache_refresh_before_expiry=False)) # test 1: find self nearest = loop.run_until_complete( me.find_nearest_nodes([me.node_id], k_nearest=1))[me.node_id] assert len(nearest) == 1 and ':'.join( nearest[me.node_id].split(':')[-2:]) == f"{LOCALHOST}:{me.port}" # test 2: find others for i in range(10): ref_endpoint, query_id = random.choice(list(dht.items())) nearest = loop.run_until_complete( me.find_nearest_nodes([query_id], k_nearest=1))[query_id] assert len(nearest) == 1 found_node_id, found_endpoint = next(iter(nearest.items())) assert found_node_id == query_id and ':'.join( found_endpoint.split(':')[-2:]) == ref_endpoint # test 3: find neighbors to random nodes accuracy_numerator = accuracy_denominator = 0 # top-1 nearest neighbor accuracy jaccard_numerator = jaccard_denominator = 0 # jaccard similarity aka intersection over union all_node_ids = list(dht.values()) for i in range(10): query_id = DHTID.generate() k_nearest = random.randint(1, 10) exclude_self = random.random() > 0.5 nearest = loop.run_until_complete( me.find_nearest_nodes([query_id], k_nearest=k_nearest, exclude_self=exclude_self))[query_id] nearest_nodes = list(nearest) # keys from ordered dict assert len( nearest_nodes ) == k_nearest, "beam search must return exactly k_nearest results" assert me.node_id not in nearest_nodes or not exclude_self, "if exclude, results shouldn't contain self" assert np.all(np.diff(query_id.xor_distance(nearest_nodes)) >= 0 ), "results must be sorted by distance" ref_nearest = heapq.nsmallest(k_nearest + 1, all_node_ids, key=query_id.xor_distance) if exclude_self and me.node_id in ref_nearest: ref_nearest.remove(me.node_id) if len(ref_nearest) > k_nearest: ref_nearest.pop() accuracy_numerator += nearest_nodes[0] == ref_nearest[0] accuracy_denominator += 1 jaccard_numerator += len( set.intersection(set(nearest_nodes), set(ref_nearest))) jaccard_denominator += k_nearest accuracy = accuracy_numerator / accuracy_denominator print("Top-1 accuracy:", accuracy) # should be 98-100% jaccard_index = jaccard_numerator / jaccard_denominator print("Jaccard index (intersection over union):", jaccard_index) # should be 95-100% assert accuracy >= 0.9, f"Top-1 accuracy only {accuracy} ({accuracy_numerator} / {accuracy_denominator})" assert jaccard_index >= 0.9, f"Jaccard index only {accuracy} ({accuracy_numerator} / {accuracy_denominator})" # test 4: find all nodes dummy = DHTID.generate() nearest = loop.run_until_complete( me.find_nearest_nodes([dummy], k_nearest=len(dht) + 100))[dummy] assert len(nearest) == len(dht) + 1 assert len( set.difference(set(nearest.keys()), set(all_node_ids) | {me.node_id})) == 0 # test 5: node without peers detached_node = loop.run_until_complete(DHTNode.create()) nearest = loop.run_until_complete(detached_node.find_nearest_nodes( [dummy]))[dummy] assert len(nearest) == 1 and nearest[ detached_node.node_id] == f"{LOCALHOST}:{detached_node.port}" nearest = loop.run_until_complete( detached_node.find_nearest_nodes([dummy], exclude_self=True))[dummy] assert len(nearest) == 0 # test 6 store and get value true_time = get_dht_time() + 1200 assert loop.run_until_complete(me.store("mykey", ["Value", 10], true_time)) that_guy = loop.run_until_complete( DHTNode.create(initial_peers=random.sample(dht.keys(), 3), parallel_rpc=10, cache_refresh_before_expiry=False, cache_locally=False)) for node in [me, that_guy]: val, expiration_time = loop.run_until_complete(node.get("mykey")) assert val == ["Value", 10], "Wrong value" assert expiration_time == true_time, f"Wrong time" assert loop.run_until_complete(detached_node.get("mykey")) is None # test 7: bulk store and bulk get keys = 'foo', 'bar', 'baz', 'zzz' values = 3, 2, 'batman', [1, 2, 3] store_ok = loop.run_until_complete( me.store_many(keys, values, expiration_time=get_dht_time() + 999)) assert all(store_ok.values()), "failed to store one or more keys" response = loop.run_until_complete(me.get_many(keys[::-1])) for key, value in zip(keys, values): assert key in response and response[key][0] == value # test 8: store dictionaries as values (with sub-keys) upper_key, subkey1, subkey2, subkey3 = 'ololo', 'k1', 'k2', 'k3' now = get_dht_time() assert loop.run_until_complete( me.store(upper_key, subkey=subkey1, value=123, expiration_time=now + 10)) assert loop.run_until_complete( me.store(upper_key, subkey=subkey2, value=456, expiration_time=now + 20)) for node in [that_guy, me]: value, time = loop.run_until_complete(node.get(upper_key)) assert isinstance(value, dict) and time == now + 20 assert value[subkey1] == (123, now + 10) assert value[subkey2] == (456, now + 20) assert len(value) == 2 assert not loop.run_until_complete( me.store( upper_key, subkey=subkey2, value=345, expiration_time=now + 10)) assert loop.run_until_complete( me.store(upper_key, subkey=subkey2, value=567, expiration_time=now + 30)) assert loop.run_until_complete( me.store(upper_key, subkey=subkey3, value=890, expiration_time=now + 50)) loop.run_until_complete(asyncio.sleep(0.1)) # wait for cache to refresh for node in [that_guy, me]: value, time = loop.run_until_complete(node.get(upper_key)) assert isinstance(value, dict) and time == now + 50, (value, time) assert value[subkey1] == (123, now + 10) assert value[subkey2] == (567, now + 30) assert value[subkey3] == (890, now + 50) assert len(value) == 3 for proc in processes: proc.terminate()
def test_dht_protocol(): # create the first peer peer1_port, peer1_id, peer1_started = hivemind.find_open_port( ), DHTID.generate(), mp.Event() peer1_proc = mp.Process(target=run_protocol_listener, args=(peer1_port, peer1_id, peer1_started), daemon=True) peer1_proc.start(), peer1_started.wait() # create another peer that connects to the first peer peer2_port, peer2_id, peer2_started = hivemind.find_open_port( ), DHTID.generate(), mp.Event() peer2_proc = mp.Process(target=run_protocol_listener, args=(peer2_port, peer2_id, peer2_started), kwargs={'ping': f'{LOCALHOST}:{peer1_port}'}, daemon=True) peer2_proc.start(), peer2_started.wait() loop = asyncio.get_event_loop() for listen in [ False, True ]: # note: order matters, this test assumes that first run uses listen=False protocol = loop.run_until_complete( DHTProtocol.create(DHTID.generate(), bucket_size=20, depth_modulo=5, wait_timeout=5, num_replicas=3, listen=listen)) print(f"Self id={protocol.node_id}", flush=True) assert loop.run_until_complete( protocol.call_ping(f'{LOCALHOST}:{peer1_port}')) == peer1_id key, value, expiration = DHTID.generate(), [ random.random(), { 'ololo': 'pyshpysh' } ], get_dht_time() + 1e3 store_ok = loop.run_until_complete( protocol.call_store(f'{LOCALHOST}:{peer1_port}', [key], [hivemind.MSGPackSerializer.dumps(value)], expiration)) assert all(store_ok), "DHT rejected a trivial store" # peer 1 must know about peer 2 (recv_value_bytes, recv_expiration), nodes_found = loop.run_until_complete( protocol.call_find(f'{LOCALHOST}:{peer1_port}', [key]))[key] recv_value = hivemind.MSGPackSerializer.loads(recv_value_bytes) (recv_id, recv_endpoint) = next(iter(nodes_found.items())) assert recv_id == peer2_id and ':'.join(recv_endpoint.split(':')[-2:]) == f"{LOCALHOST}:{peer2_port}", \ f"expected id={peer2_id}, peer={LOCALHOST}:{peer2_port} but got {recv_id}, {recv_endpoint}" assert recv_value == value and recv_expiration == expiration, \ f"call_find_value expected {value} (expires by {expiration}) " \ f"but got {recv_value} (expires by {recv_expiration})" # peer 2 must know about peer 1, but not have a *random* nonexistent value dummy_key = DHTID.generate() empty_item, nodes_found_2 = loop.run_until_complete( protocol.call_find(f'{LOCALHOST}:{peer2_port}', [dummy_key]))[dummy_key] assert empty_item is None, "Non-existent keys shouldn't have values" (recv_id, recv_endpoint) = next(iter(nodes_found_2.items())) assert recv_id == peer1_id and recv_endpoint == f"{LOCALHOST}:{peer1_port}", \ f"expected id={peer1_id}, peer={LOCALHOST}:{peer1_port} but got {recv_id}, {recv_endpoint}" # cause a non-response by querying a nonexistent peer dummy_port = hivemind.find_open_port() assert loop.run_until_complete( protocol.call_find(f"{LOCALHOST}:{dummy_port}", [key])) is None # store/get a dictionary with sub-keys nested_key, subkey1, subkey2 = DHTID.generate(), 'foo', 'bar' value1, value2 = [random.random(), {'ololo': 'pyshpysh'}], 'abacaba' assert loop.run_until_complete( protocol.call_store( f'{LOCALHOST}:{peer1_port}', keys=[nested_key], values=[hivemind.MSGPackSerializer.dumps(value1)], expiration_time=[expiration], subkeys=[subkey1])) assert loop.run_until_complete( protocol.call_store( f'{LOCALHOST}:{peer1_port}', keys=[nested_key], values=[hivemind.MSGPackSerializer.dumps(value2)], expiration_time=[expiration + 5], subkeys=[subkey2])) (recv_dict, recv_expiration), nodes_found = loop.run_until_complete( protocol.call_find(f'{LOCALHOST}:{peer1_port}', [nested_key]))[nested_key] assert isinstance(recv_dict, DictionaryDHTValue) assert len(recv_dict.data) == 2 and recv_expiration == expiration + 5 assert recv_dict.data[subkey1] == (protocol.serializer.dumps(value1), expiration) assert recv_dict.data[subkey2] == (protocol.serializer.dumps(value2), expiration + 5) assert LOCALHOST in loop.run_until_complete( protocol.get_outgoing_request_endpoint( f'{LOCALHOST}:{peer1_port}')) if listen: loop.run_until_complete(protocol.shutdown()) peer1_proc.terminate() peer2_proc.terminate()
def _tester(): # note: we run everything in a separate process to re-initialize all global states from scratch # this helps us avoid undesirable side-effects when running multiple tests in sequence loop = asyncio.get_event_loop() for listen in [ False, True ]: # note: order matters, this test assumes that first run uses listen=False protocol = loop.run_until_complete( DHTProtocol.create(DHTID.generate(), bucket_size=20, depth_modulo=5, wait_timeout=5, num_replicas=3, listen=listen)) print(f"Self id={protocol.node_id}", flush=True) assert loop.run_until_complete( protocol.call_ping(f'{LOCALHOST}:{peer1_port}')) == peer1_id key, value, expiration = DHTID.generate(), [ random.random(), { 'ololo': 'pyshpysh' } ], get_dht_time() + 1e3 store_ok = loop.run_until_complete( protocol.call_store(f'{LOCALHOST}:{peer1_port}', [key], [hivemind.MSGPackSerializer.dumps(value)], expiration)) assert all(store_ok), "DHT rejected a trivial store" # peer 1 must know about peer 2 recv_value_bytes, recv_expiration, nodes_found = loop.run_until_complete( protocol.call_find(f'{LOCALHOST}:{peer1_port}', [key]))[key] recv_value = hivemind.MSGPackSerializer.loads(recv_value_bytes) (recv_id, recv_endpoint) = next(iter(nodes_found.items())) assert recv_id == peer2_id and ':'.join(recv_endpoint.split(':')[-2:]) == f"{LOCALHOST}:{peer2_port}", \ f"expected id={peer2_id}, peer={LOCALHOST}:{peer2_port} but got {recv_id}, {recv_endpoint}" assert recv_value == value and recv_expiration == expiration, \ f"call_find_value expected {value} (expires by {expiration}) " \ f"but got {recv_value} (expires by {recv_expiration})" # peer 2 must know about peer 1, but not have a *random* nonexistent value dummy_key = DHTID.generate() recv_dummy_value, recv_dummy_expiration, nodes_found_2 = loop.run_until_complete( protocol.call_find(f'{LOCALHOST}:{peer2_port}', [dummy_key]))[dummy_key] assert recv_dummy_value is None and recv_dummy_expiration is None, "Non-existent keys shouldn't have values" (recv_id, recv_endpoint) = next(iter(nodes_found_2.items())) assert recv_id == peer1_id and recv_endpoint == f"{LOCALHOST}:{peer1_port}", \ f"expected id={peer1_id}, peer={LOCALHOST}:{peer1_port} but got {recv_id}, {recv_endpoint}" # cause a non-response by querying a nonexistent peer dummy_port = hivemind.find_open_port() assert loop.run_until_complete( protocol.call_find(f"{LOCALHOST}:{dummy_port}", [key])) is None if listen: loop.run_until_complete(protocol.shutdown()) print("DHTProtocol test finished successfully!") test_success.set()
def test_get_empty(): d = LocalStorage() assert d.get(DHTID.generate( source="key")) == (None, None), "LocalStorage returned non-existent value" print("Test get expired passed")
def test_store(): d = LocalStorage() d.store(DHTID.generate("key"), b"val", get_dht_time() + 0.5) assert d.get(DHTID.generate("key"))[0] == b"val", "Wrong value" print("Test store passed")
def _tester(): # note: we run everything in a separate process to re-initialize all global states from scratch # this helps us avoid undesirable side-effects when running multiple tests in sequence loop = asyncio.get_event_loop() me = loop.run_until_complete( DHTNode.create(initial_peers=random.sample(dht.keys(), 5), parallel_rpc=10)) # test 1: find self nearest = loop.run_until_complete( me.find_nearest_nodes([me.node_id], k_nearest=1))[me.node_id] assert len(nearest) == 1 and ':'.join( nearest[me.node_id].split(':')[-2:]) == f"{LOCALHOST}:{me.port}" # test 2: find others for i in range(10): ref_endpoint, query_id = random.choice(list(dht.items())) nearest = loop.run_until_complete( me.find_nearest_nodes([query_id], k_nearest=1))[query_id] assert len(nearest) == 1 found_node_id, found_endpoint = next(iter(nearest.items())) assert found_node_id == query_id and ':'.join( found_endpoint.split(':')[-2:]) == ref_endpoint # test 3: find neighbors to random nodes accuracy_numerator = accuracy_denominator = 0 # top-1 nearest neighbor accuracy jaccard_numerator = jaccard_denominator = 0 # jaccard similarity aka intersection over union all_node_ids = list(dht.values()) for i in range(100): query_id = DHTID.generate() k_nearest = random.randint(1, 20) exclude_self = random.random() > 0.5 nearest = loop.run_until_complete( me.find_nearest_nodes([query_id], k_nearest=k_nearest, exclude_self=exclude_self))[query_id] nearest_nodes = list(nearest) # keys from ordered dict assert len( nearest_nodes ) == k_nearest, "beam search must return exactly k_nearest results" assert me.node_id not in nearest_nodes or not exclude_self, "if exclude, results shouldn't contain self" assert np.all(np.diff(query_id.xor_distance(nearest_nodes)) >= 0 ), "results must be sorted by distance" ref_nearest = heapq.nsmallest(k_nearest + 1, all_node_ids, key=query_id.xor_distance) if exclude_self and me.node_id in ref_nearest: ref_nearest.remove(me.node_id) if len(ref_nearest) > k_nearest: ref_nearest.pop() accuracy_numerator += nearest_nodes[0] == ref_nearest[0] accuracy_denominator += 1 jaccard_numerator += len( set.intersection(set(nearest_nodes), set(ref_nearest))) jaccard_denominator += k_nearest accuracy = accuracy_numerator / accuracy_denominator print("Top-1 accuracy:", accuracy) # should be 98-100% jaccard_index = jaccard_numerator / jaccard_denominator print("Jaccard index (intersection over union):", jaccard_index) # should be 95-100% assert accuracy >= 0.9, f"Top-1 accuracy only {accuracy} ({accuracy_numerator} / {accuracy_denominator})" assert jaccard_index >= 0.9, f"Jaccard index only {accuracy} ({accuracy_numerator} / {accuracy_denominator})" # test 4: find all nodes dummy = DHTID.generate() nearest = loop.run_until_complete( me.find_nearest_nodes([dummy], k_nearest=len(dht) + 100))[dummy] assert len(nearest) == len(dht) + 1 assert len( set.difference(set(nearest.keys()), set(all_node_ids) | {me.node_id})) == 0 # test 5: node without peers other_node = loop.run_until_complete(DHTNode.create()) nearest = loop.run_until_complete( other_node.find_nearest_nodes([dummy]))[dummy] assert len(nearest) == 1 and nearest[ other_node.node_id] == f"{LOCALHOST}:{other_node.port}" nearest = loop.run_until_complete( other_node.find_nearest_nodes([dummy], exclude_self=True))[dummy] assert len(nearest) == 0 # test 6 store and get value true_time = get_dht_time() + 1200 assert loop.run_until_complete( me.store("mykey", ["Value", 10], true_time)) for node in [me, other_node]: val, expiration_time = loop.run_until_complete(me.get("mykey")) assert expiration_time == true_time, "Wrong time" assert val == ["Value", 10], "Wrong value" # test 7: bulk store and bulk get keys = 'foo', 'bar', 'baz', 'zzz' values = 3, 2, 'batman', [1, 2, 3] store_ok = loop.run_until_complete( me.store_many(keys, values, expiration_time=get_dht_time() + 999)) assert all(store_ok.values()), "failed to store one or more keys" response = loop.run_until_complete(me.get_many(keys[::-1])) for key, value in zip(keys, values): assert key in response and response[key][0] == value test_success.set()