def get_experts( self, uids, expiration_time: Optional[DHTExpiration] = None, return_future: bool = False) -> List[Optional[RemoteExpert]]: logger.warning( "dht.get_experts is scheduled for removal in 0.9.8, please use hivemind.get_experts." ) return hivemind.get_experts(self, uids, expiration_time, return_future)
def test_store_get_experts(): peers = [hivemind.DHT(start=True)] for i in range(10): neighbors_i = [ f'{LOCALHOST}:{node.port}' for node in random.sample(peers, min(3, len(peers))) ] peers.append(hivemind.DHT(initial_peers=neighbors_i, start=True)) first_peer = random.choice(peers) other_peer = random.choice(peers) expert_uids = [f"my_expert.{i}" for i in range(50)] batch_size = 10 for batch_start in range(0, len(expert_uids), batch_size): hivemind.declare_experts( first_peer, expert_uids[batch_start:batch_start + batch_size], 'localhost:1234') found = get_experts(other_peer, random.sample(expert_uids, 5) + ['foo', 'bar']) assert all(res is not None for res in found[:-2]), "Could not find some existing experts" assert all(res is None for res in found[-2:]), "Found non-existing experts" other_expert, other_port = "my_other_expert.1337", random.randint( 1000, 9999) hivemind.declare_experts(other_peer, [other_expert], f'that_host:{other_port}') first_notfound, first_found = get_experts(first_peer, ['foobar', other_expert]) assert isinstance(first_found, hivemind.RemoteExpert) assert first_found.endpoint == f'that_host:{other_port}' for peer in peers: peer.shutdown()
def test_dht_single_node(): node = hivemind.DHT(start=True) beam_search = MoEBeamSearcher(node, 'expert.', grid_size=(10, )) assert all( declare_experts(node, ['expert.1', 'expert.2', 'expert.3'], f"{hivemind.LOCALHOST}:1337").values()) assert len(declare_experts(node, ["ffn.1", "ffn.2"], endpoint="that_place")) == 4 assert len( declare_experts(node, ['e.1.2.3', 'e.1.2.5', 'e.2.0'], f"{hivemind.LOCALHOST}:42")) == 7 for expert in get_experts(node, ['expert.3', 'expert.2']): assert expert.endpoint == f"{hivemind.LOCALHOST}:1337" assert all( declare_experts(node, ['expert.5', 'expert.2'], f"{hivemind.LOCALHOST}:1337").values()) found_experts = beam_search.find_best_experts( [(0., 1., 2., 3., 4., 5., 6., 7., 8.)], beam_size=2) assert len(found_experts) == 2 and [ expert.uid for expert in found_experts ] == ['expert.5', 'expert.3'] successors = beam_search.get_active_successors( ['e.1.2.', 'e.2.', 'e.4.5.']) assert len(successors['e.1.2.']) == 2 assert successors['e.1.2.'][3] == UidEndpoint('e.1.2.3', f'{LOCALHOST}:42') assert successors['e.1.2.'][5] == UidEndpoint('e.1.2.5', f'{LOCALHOST}:42') assert len( successors['e.2.']) == 1 and successors['e.2.'][0] == UidEndpoint( 'e.2.0', f'{LOCALHOST}:42') assert successors['e.4.5.'] == {} initial_beam = beam_search.get_initial_beam((3, 2, 1, 0, -1, -2, -3), beam_size=3) assert len(initial_beam) == 3 assert initial_beam[0][:2] == (2.0, 'expert.1.') assert initial_beam[1][:2] == (1.0, 'expert.2.') assert initial_beam[2][:2] == (0.0, 'expert.3.') with pytest.raises(AssertionError): beam_search = MoEBeamSearcher(node, 'expert.1.ffn', (2, 2)) with pytest.raises(AssertionError): beam_search.get_active_successors(['e.1.2.', 'e.2', 'e.4.5.'])
def benchmark_dht(num_peers: int, initial_peers: int, num_experts: int, expert_batch_size: int, random_seed: int, wait_after_request: float, wait_before_read: float, wait_timeout: float, expiration: float): random.seed(random_seed) print("Creating peers...") peers = [] for _ in trange(num_peers): neighbors = [ f'0.0.0.0:{node.port}' for node in random.sample(peers, min(initial_peers, len(peers))) ] peer = hivemind.DHT(initial_peers=neighbors, start=True, wait_timeout=wait_timeout, listen_on=f'0.0.0.0:*') peers.append(peer) store_peer, get_peer = peers[-2:] expert_uids = list( set(f"expert.{random.randint(0, 999)}.{random.randint(0, 999)}.{random.randint(0, 999)}" for _ in range(num_experts))) print(f"Sampled {len(expert_uids)} unique ids (after deduplication)") random.shuffle(expert_uids) print(f"Storing experts to dht in batches of {expert_batch_size}...") successful_stores = total_stores = total_store_time = 0 benchmark_started = time.perf_counter() endpoints = [] for start in trange(0, num_experts, expert_batch_size): store_start = time.perf_counter() endpoints.append(random_endpoint()) store_ok = hivemind.declare_experts(store_peer, expert_uids[start:start + expert_batch_size], endpoints[-1], expiration=expiration) successes = store_ok.values() total_store_time += time.perf_counter() - store_start total_stores += len(successes) successful_stores += sum(successes) time.sleep(wait_after_request) print( f"Store success rate: {successful_stores / total_stores * 100:.1f}% ({successful_stores} / {total_stores})" ) print( f"Mean store time: {total_store_time / total_stores:.5}, Total: {total_store_time:.5}" ) time.sleep(wait_before_read) if time.perf_counter() - benchmark_started > expiration: logger.warning( "All keys expired before benchmark started getting them. Consider increasing expiration_time" ) successful_gets = total_get_time = 0 for start in trange(0, len(expert_uids), expert_batch_size): get_start = time.perf_counter() get_result = hivemind.get_experts( get_peer, expert_uids[start:start + expert_batch_size]) total_get_time += time.perf_counter() - get_start for i, expert in enumerate(get_result): if expert is not None and expert.uid == expert_uids[start + i] \ and expert.endpoint == endpoints[start // expert_batch_size]: successful_gets += 1 if time.perf_counter() - benchmark_started > expiration: logger.warning( "keys expired midway during get requests. If that isn't desired, increase expiration_time param" ) print( f"Get success rate: {successful_gets / len(expert_uids) * 100:.1f} ({successful_gets} / {len(expert_uids)})" ) print( f"Mean get time: {total_get_time / len(expert_uids):.5f}, Total: {total_get_time:.5f}" ) alive_peers = [peer.is_alive() for peer in peers] print(f"Node survival rate: {len(alive_peers) / len(peers) * 100:.3f}%")
def generate_uids_from_pattern(num_experts: int, expert_pattern: Optional[str], dht: Optional[DHT] = None, attempts_per_expert=10) -> List[str]: """ Sample experts from a given pattern, remove duplicates. :param num_experts: sample this many unique expert uids :param expert_pattern: a string pattern or a list of expert uids, example: myprefix.[0:32].[0:256]\ means "sample random experts between myprefix.0.0 and myprefix.255.255; :param dht: if specified, uses this DHT to check that expert uids are not yet occupied by other peers :param attempts_per_expert: give up if unable to generate a new expert uid after this many attempts per uid :note: this method is not strictly process-safe. If several servers run it concurrently, they have a small chance of sampling duplicate expert uids. """ remaining_attempts = attempts_per_expert * num_experts found_uids, attempted_uids = list(), set() def _generate_uid(): if expert_pattern is None: return f"expert{UID_DELIMITER}{attempts_per_expert * num_experts - remaining_attempts}" uid = [] for block in expert_pattern.split(UID_DELIMITER): try: if '[' not in block and ']' not in block: uid.append(block) elif block.startswith('[') and block.endswith( ']') and ':' in block: slice_start, slice_end = map(int, block[1:-1].split(':')) uid.append(str(random.randint(slice_start, slice_end - 1))) else: raise ValueError( "Block must be either fixed or a range [from:to]") except KeyboardInterrupt: raise except Exception as e: raise ValueError( f"Expert pattern {expert_pattern} has invalid block {block}, {e}" ) return UID_DELIMITER.join(uid) while remaining_attempts > 0 and len(found_uids) < num_experts: # 1. sample new expert uids at random new_uids = [] while len(new_uids) + len( found_uids) < num_experts and remaining_attempts > 0: new_uid = _generate_uid() remaining_attempts -= 1 if new_uid not in attempted_uids: attempted_uids.add(new_uid) new_uids.append(new_uid) # 2. look into DHT (if given) and remove duplicates if dht: existing_expert_uids = { found_expert.uid for found_expert in hivemind.get_experts(dht, new_uids) if found_expert is not None } new_uids = [ new_uid for new_uid in new_uids if new_uid not in existing_expert_uids ] found_uids += new_uids if len(found_uids) != num_experts: logger.warning( f"Found only {len(found_uids)} out of {num_experts} free expert uids after " f"{attempts_per_expert * num_experts} attempts") return found_uids