def run(progress_q, results_q, threadid, adhoc_filename, episodes, results_folder, world_size): random_instance = random.Random(100 + threadid) random.seed(100 + threadid) np.random.seed(100 + threadid) num_agents = 4 # adhoc = AdhocAgent(3, mcts_c=mcts_c, mcts_k=mcts_k, mcts_n=mcts_n, behavior_model_size=bsize, # environment_model_size=esize) # load_run(dataset_folder / dataset_name, adhoc, episodes, fit=False, compute_metrics=False) adhoc = AdhocAgent.load(adhoc_filename) agents = [agent_type(i) for i in range(num_agents - 1)] + [adhoc] transition_f = get_transition_function(num_agents, world_size, random.Random(100)) reward_f = get_reward_function(num_agents, world_size) world = World( PursuitState.random_state(num_agents, world_size, random_instance), agents, transition_f, reward_f) timesteps, reward = world.run(0, 500) progress_q.put(1) results_q.put( (str(results_folder / 'results_eps{}'.format(episodes)), timesteps)) results_q.put((str(results_folder / 'eaccuracy_eps{}'.format(episodes)), np.average(adhoc.e_model.metric))) results_q.put((str(results_folder / 'baccuracy_eps{}'.format(episodes)), np.average(adhoc.b_model.metric))) results_q.put( (str(results_folder / 'eaccuracyprey_eps{}'.format(episodes)), np.average(adhoc.e_model.metric_prey)))
def __call__(self, state_node): result = super().__call__(state_node) world = World(state_node.state, [GreedyAgent(i) for i in range(4)], get_transition_function(4, world_size), get_reward_function(4, world_size)) ts, reward = world.run(0, 1000) rollouts[self.k].append(result) rewards[self.k].append(reward) return result
def run(q, threadid): random_instance = random.Random(100 + threadid) random.seed(100 + threadid) np.random.seed(100 + threadid) num_agents = 4 # adhoc = AdhocAgent(3, mcts_c=mcts_c, mcts_k=mcts_k, mcts_n=mcts_n, behavior_model_size=bsize, # environment_model_size=esize) adhoc = AdhocAfterNAgent(agent_type(3), episodes - 1, 3, mcts_c=mcts_c, mcts_k=mcts_k, mcts_n=mcts_n, behavior_model_size=bsize, environment_model_size=esize) agents = [agent_type(i) for i in range(num_agents - 1)] + [adhoc] transition_f = get_transition_function(num_agents, world_size, random.Random(100)) reward_f = get_reward_function(num_agents, world_size) world = World( PursuitState.random_state(num_agents, world_size, random_instance), agents, transition_f, reward_f) results = [] bmodelmetric = [] emodelmetric = [] emodelmetric_prey = [] try: for i in range(episodes): world.initial_state = PursuitState.random_state( num_agents, world_size, random_instance) timesteps, reward = world.run(0, 200) results.append(timesteps) timesteps = max(1, timesteps) bmodelmetric.append( sum(adhoc.b_model.metric[-timesteps:]) / timesteps) emodelmetric.append( sum(adhoc.e_model.metric[-timesteps:]) / timesteps) emodelmetric_prey.append( sum(adhoc.e_model.metric_prey[-timesteps:]) / timesteps) q.put(1) finally: np.save(str(results_folder / 'results_{}'.format(threadid)), np.array(results)) np.save(str(results_folder / 'eaccuracy_{}'.format(threadid)), np.array(emodelmetric)) np.save(str(results_folder / 'baccuracy_{}'.format(threadid)), np.array(bmodelmetric)) np.save(str(results_folder / 'eaccuracyprey_{}'.format(threadid)), np.array(emodelmetric_prey))
def init(episodes, world_q): random_instance = random.Random(100) random.seed(100) np.random.seed(100) num_agents = 4 adhoc = AdhocAfterNAgent(agent_type(3), episodes, 3, mcts_c=mcts_c, mcts_k=mcts_k, mcts_n=mcts_n, behavior_model_size=bsize, environment_model_size=esize) agents = [agent_type(i) for i in range(num_agents - 1)] + [adhoc] transition_f = get_transition_function(num_agents, world_size, random.Random(100)) reward_f = get_reward_function(num_agents, world_size) world = World(PursuitState.random_state(num_agents, world_size, random_instance), agents, transition_f, reward_f) for _ in tqdm.tqdm(range(episodes)): world.initial_state = PursuitState.random_state(num_agents, world_size, random_instance) world.run(0, 200) for _ in range(n_threads): world_q.put(world) return world, adhoc
def save_run(filename, number_episodes, agents, world_size=(5, 5), seed=100): random_instance = random.Random(seed) num_agents = len(agents) transition_f = get_transition_function(num_agents, world_size, random.Random(seed)) reward_f = get_reward_function(num_agents, world_size) transition_recorder = TransitionRecorder() world = World(PursuitState.random_state(num_agents, world_size, random_instance), agents, transition_f, reward_f, visualizers=(transition_recorder, )) for i in range(number_episodes): world.initial_state = PursuitState.random_state(num_agents, world_size, random_instance) _, _ = world.run(0, 1000) output_file = open(filename, 'wb') pickle.dump(transition_recorder.transitions, output_file) output_file.close()
tree = MCTS(tree_policy=UCB1(c=self.mcts_c), default_policy=RandomKStepRollOut2(self.mcts_k), backup=monte_carlo) self.prev_action = tree(self.root, n=n) # print([[y.n for y in x.children.values()] for x in self.root.children.values()]) return self.prev_action for k in (10, 100, 1000): for n in (1000, ): for c in (100, ): agents = [GreedyAgent(i) for i in range(4)] random.seed(100) agents[-1] = MCTSAgent(3, n, k, c * k) results = [] for i in range(1): world = World( PursuitState.random_state(len(agents), world_size), agents, get_transition_function(len(agents), world_size), get_reward_function(len(agents), world_size)) timesteps, reward = world.run(0, 1000) results.append(timesteps) print("k: " + str(k)) print("n: " + str(n)) print("c: " + str(c)) print("avg: " + str(sum(results) / len(results))) print(rollouts) print(rewards)
agents = [TeammateAwareAgent(i) for i in range(num_agents)] prey_moves = [(-1, 0), (1, 0), (0, 0)] transition_f = get_transition_function(num_agents, world_size, prey_moves=prey_moves) reward_f = get_reward_function(num_agents, world_size) agent_colors = [(255, 0, 0), (175, 0, 75), (75, 0, 175), (0, 0, 255)] visualizer = PygameVisualizer(200, 200, agent_colors=agent_colors, agents=agents) visualizers = (visualizer, ) initial_state = PursuitState(((0, 1), (1, 0), (0, 3), (1, 2)), ((0, 0), ), world_size) world = World(initial_state, agents, transition_f, reward_f, visualizers=visualizers) print(world.run(1, 100)) # expected actions # RIGHT LEFT UP DOWN NOOP # 4, 2, 2, 4 DOWN LEFT LEFT DOWN # 4, 2, 2, 1 DOWN LEFT LEFT RIGH # 4, 3, 2, 1 DOWN UUUP LEFT RIGH # 1, 3, 2, 3 RIGH UUUP LEFT UUUP # 1, 3, 2, 1 RIGH UUUP LEFT RIGH # 1, 3, 2, 1 RIGH UUUP LEFT RIGH
agent_colors=agent_colors, agents=agents) visualizers = (visualizer, ) for i in range(iters): transition_f = get_transition_function(num_agents, world_size, random.Random(100 + i)) reward_f = get_reward_function(num_agents, world_size) world = World( PursuitState.random_state(num_agents, world_size, random.Random(100 + i)), agents, transition_f, reward_f, ) timesteps, reward = world.run(0., 5000) results.append(timesteps) print(timesteps) plt.plot(results) plt.plot([np.average(results[:i]) for i in range(1, len(results))], label='average') plt.show() # print(results) # print(world_size) # print(k) print(np.average(results)) # print(np.std(results)) print( st.t.interval( 0.9, len(results) - 1, loc=np.mean(results), scale=st.sem(results)) -