def run(q, threadid): random_instance = random.Random(100 + threadid) random.seed(100 + threadid) np.random.seed(100 + threadid) num_agents = 4 # adhoc = AdhocAgent(3, mcts_c=mcts_c, mcts_k=mcts_k, mcts_n=mcts_n, behavior_model_size=bsize, # environment_model_size=esize) adhoc = AdhocAfterNAgent(agent_type(3), episodes - 1, 3, mcts_c=mcts_c, mcts_k=mcts_k, mcts_n=mcts_n, behavior_model_size=bsize, environment_model_size=esize) agents = [agent_type(i) for i in range(num_agents - 1)] + [adhoc] transition_f = get_transition_function(num_agents, world_size, random.Random(100)) reward_f = get_reward_function(num_agents, world_size) world = World( PursuitState.random_state(num_agents, world_size, random_instance), agents, transition_f, reward_f) results = [] bmodelmetric = [] emodelmetric = [] emodelmetric_prey = [] try: for i in range(episodes): world.initial_state = PursuitState.random_state( num_agents, world_size, random_instance) timesteps, reward = world.run(0, 200) results.append(timesteps) timesteps = max(1, timesteps) bmodelmetric.append( sum(adhoc.b_model.metric[-timesteps:]) / timesteps) emodelmetric.append( sum(adhoc.e_model.metric[-timesteps:]) / timesteps) emodelmetric_prey.append( sum(adhoc.e_model.metric_prey[-timesteps:]) / timesteps) q.put(1) finally: np.save(str(results_folder / 'results_{}'.format(threadid)), np.array(results)) np.save(str(results_folder / 'eaccuracy_{}'.format(threadid)), np.array(emodelmetric)) np.save(str(results_folder / 'baccuracy_{}'.format(threadid)), np.array(bmodelmetric)) np.save(str(results_folder / 'eaccuracyprey_{}'.format(threadid)), np.array(emodelmetric_prey))
def save_run(filename, number_episodes, agents, world_size=(5, 5), seed=100): random_instance = random.Random(seed) num_agents = len(agents) transition_f = get_transition_function(num_agents, world_size, random.Random(seed)) reward_f = get_reward_function(num_agents, world_size) transition_recorder = TransitionRecorder() world = World(PursuitState.random_state(num_agents, world_size, random_instance), agents, transition_f, reward_f, visualizers=(transition_recorder, )) for i in range(number_episodes): world.initial_state = PursuitState.random_state(num_agents, world_size, random_instance) _, _ = world.run(0, 1000) output_file = open(filename, 'wb') pickle.dump(transition_recorder.transitions, output_file) output_file.close()
def init(episodes, world_q): random_instance = random.Random(100) random.seed(100) np.random.seed(100) num_agents = 4 adhoc = AdhocAfterNAgent(agent_type(3), episodes, 3, mcts_c=mcts_c, mcts_k=mcts_k, mcts_n=mcts_n, behavior_model_size=bsize, environment_model_size=esize) agents = [agent_type(i) for i in range(num_agents - 1)] + [adhoc] transition_f = get_transition_function(num_agents, world_size, random.Random(100)) reward_f = get_reward_function(num_agents, world_size) world = World(PursuitState.random_state(num_agents, world_size, random_instance), agents, transition_f, reward_f) for _ in tqdm.tqdm(range(episodes)): world.initial_state = PursuitState.random_state(num_agents, world_size, random_instance) world.run(0, 200) for _ in range(n_threads): world_q.put(world) return world, adhoc
mcts_c=mcts_c, mcts_k=mcts_k, mcts_n=mcts_n, behavior_model_size=bsize, environment_model_size=esize, eps=1.0, fit=None) # adhoc = AdhocAgent.load('adhoc_dataset/10x10greedy_random_200') agents = [agent_type(i) for i in range(num_agents - 1)] + [adhoc] transition_f = get_transition_function(num_agents, world_size, random.Random(100)) reward_f = get_reward_function(num_agents, world_size) world = World( PursuitState.random_state(num_agents, world_size, random_instance), agents, transition_f, reward_f) save_episodes = (1, 5, 10, 20, 50, 100, 150, 200) current_episode = 0 for episodes in save_episodes: for current_episode in range(current_episode, episodes): world.initial_state = PursuitState.random_state( num_agents, world_size, random_instance) timesteps, reward = world.run(0, 100) print(timesteps) print("acc average " + str(np.average(adhoc.e_model.metric))) print("acc prey average " + str(np.average(adhoc.e_model.metric_prey))) print("behavior average " + str(np.average(adhoc.b_model.metric))) adhoc.save(str(folder / ('10x10greedy_random_epochs5_' + str(episodes))))