def run(progress_q, results_q, threadid, adhoc_filename, episodes, results_folder, world_size): random_instance = random.Random(100 + threadid) random.seed(100 + threadid) np.random.seed(100 + threadid) num_agents = 4 # adhoc = AdhocAgent(3, mcts_c=mcts_c, mcts_k=mcts_k, mcts_n=mcts_n, behavior_model_size=bsize, # environment_model_size=esize) # load_run(dataset_folder / dataset_name, adhoc, episodes, fit=False, compute_metrics=False) adhoc = AdhocAgent.load(adhoc_filename) agents = [agent_type(i) for i in range(num_agents - 1)] + [adhoc] transition_f = get_transition_function(num_agents, world_size, random.Random(100)) reward_f = get_reward_function(num_agents, world_size) world = World( PursuitState.random_state(num_agents, world_size, random_instance), agents, transition_f, reward_f) timesteps, reward = world.run(0, 500) progress_q.put(1) results_q.put( (str(results_folder / 'results_eps{}'.format(episodes)), timesteps)) results_q.put((str(results_folder / 'eaccuracy_eps{}'.format(episodes)), np.average(adhoc.e_model.metric))) results_q.put((str(results_folder / 'baccuracy_eps{}'.format(episodes)), np.average(adhoc.b_model.metric))) results_q.put( (str(results_folder / 'eaccuracyprey_eps{}'.format(episodes)), np.average(adhoc.e_model.metric_prey)))
def __init__(self, state, adhoc_id, agent_type): super().__init__(state.agent_positions, state.prey_positions, state.world_size) self.adhoc_id = adhoc_id self.reward_fn = get_reward_function(len(state.agent_positions), state.world_size) self.transi_fn = get_transition_function(len(state.agent_positions), state.world_size) self.agents = [agent_type(i) for i in range(3)] self.agent_type = agent_type
def __call__(self, state_node): result = super().__call__(state_node) world = World(state_node.state, [GreedyAgent(i) for i in range(4)], get_transition_function(4, world_size), get_reward_function(4, world_size)) ts, reward = world.run(0, 1000) rollouts[self.k].append(result) rewards[self.k].append(reward) return result
def run(q, threadid): random_instance = random.Random(100 + threadid) random.seed(100 + threadid) np.random.seed(100 + threadid) num_agents = 4 # adhoc = AdhocAgent(3, mcts_c=mcts_c, mcts_k=mcts_k, mcts_n=mcts_n, behavior_model_size=bsize, # environment_model_size=esize) adhoc = AdhocAfterNAgent(agent_type(3), episodes - 1, 3, mcts_c=mcts_c, mcts_k=mcts_k, mcts_n=mcts_n, behavior_model_size=bsize, environment_model_size=esize) agents = [agent_type(i) for i in range(num_agents - 1)] + [adhoc] transition_f = get_transition_function(num_agents, world_size, random.Random(100)) reward_f = get_reward_function(num_agents, world_size) world = World( PursuitState.random_state(num_agents, world_size, random_instance), agents, transition_f, reward_f) results = [] bmodelmetric = [] emodelmetric = [] emodelmetric_prey = [] try: for i in range(episodes): world.initial_state = PursuitState.random_state( num_agents, world_size, random_instance) timesteps, reward = world.run(0, 200) results.append(timesteps) timesteps = max(1, timesteps) bmodelmetric.append( sum(adhoc.b_model.metric[-timesteps:]) / timesteps) emodelmetric.append( sum(adhoc.e_model.metric[-timesteps:]) / timesteps) emodelmetric_prey.append( sum(adhoc.e_model.metric_prey[-timesteps:]) / timesteps) q.put(1) finally: np.save(str(results_folder / 'results_{}'.format(threadid)), np.array(results)) np.save(str(results_folder / 'eaccuracy_{}'.format(threadid)), np.array(emodelmetric)) np.save(str(results_folder / 'baccuracy_{}'.format(threadid)), np.array(bmodelmetric)) np.save(str(results_folder / 'eaccuracyprey_{}'.format(threadid)), np.array(emodelmetric_prey))
def __init__(self, agents, world_size=(5, 5), max_steps=1000): self.world_size = world_size self.agent = DummyAgent(3) initial_state = self._get_new_state() transition_f = get_transition_function(4, world_size) reward_f = get_reward_function(4, world_size) self.world = World(initial_state, agents + [self.agent], transition_f, reward_f) self.reward_range = (-1, 0) self.action_space = spaces.Discrete(4) self.observation_space = spaces.Box(low=0, high=max(world_size), shape=(8, )) self.max_steps = max_steps self.i = 0
def save_run(filename, number_episodes, agents, world_size=(5, 5), seed=100): random_instance = random.Random(seed) num_agents = len(agents) transition_f = get_transition_function(num_agents, world_size, random.Random(seed)) reward_f = get_reward_function(num_agents, world_size) transition_recorder = TransitionRecorder() world = World(PursuitState.random_state(num_agents, world_size, random_instance), agents, transition_f, reward_f, visualizers=(transition_recorder, )) for i in range(number_episodes): world.initial_state = PursuitState.random_state(num_agents, world_size, random_instance) _, _ = world.run(0, 1000) output_file = open(filename, 'wb') pickle.dump(transition_recorder.transitions, output_file) output_file.close()
def act(self, state): game_state = GameState( state.agent_positions, state.prey_positions, world_size, agents, get_reward_function(len(agents), world_size), get_transition_function(len(agents), world_size)) if self.root is not None and state in self.root.children[ self.prev_action].children: self.root = self.root.children[self.prev_action].children[state] self.root.parent = None n = self.mcts_n - self.root.n else: self.root = StateNode(None, game_state) n = self.mcts_n # print(self.mcts_n) tree = MCTS(tree_policy=UCB1(c=self.mcts_c), default_policy=RandomKStepRollOut2(self.mcts_k), backup=monte_carlo) self.prev_action = tree(self.root, n=n) # print([[y.n for y in x.children.values()] for x in self.root.children.values()]) return self.prev_action
def init(episodes, world_q): random_instance = random.Random(100) random.seed(100) np.random.seed(100) num_agents = 4 adhoc = AdhocAfterNAgent(agent_type(3), episodes, 3, mcts_c=mcts_c, mcts_k=mcts_k, mcts_n=mcts_n, behavior_model_size=bsize, environment_model_size=esize) agents = [agent_type(i) for i in range(num_agents - 1)] + [adhoc] transition_f = get_transition_function(num_agents, world_size, random.Random(100)) reward_f = get_reward_function(num_agents, world_size) world = World(PursuitState.random_state(num_agents, world_size, random_instance), agents, transition_f, reward_f) for _ in tqdm.tqdm(range(episodes)): world.initial_state = PursuitState.random_state(num_agents, world_size, random_instance) world.run(0, 200) for _ in range(n_threads): world_q.put(world) return world, adhoc
tree = MCTS(tree_policy=UCB1(c=self.mcts_c), default_policy=RandomKStepRollOut2(self.mcts_k), backup=monte_carlo) self.prev_action = tree(self.root, n=n) # print([[y.n for y in x.children.values()] for x in self.root.children.values()]) return self.prev_action for k in (10, 100, 1000): for n in (1000, ): for c in (100, ): agents = [GreedyAgent(i) for i in range(4)] random.seed(100) agents[-1] = MCTSAgent(3, n, k, c * k) results = [] for i in range(1): world = World( PursuitState.random_state(len(agents), world_size), agents, get_transition_function(len(agents), world_size), get_reward_function(len(agents), world_size)) timesteps, reward = world.run(0, 1000) results.append(timesteps) print("k: " + str(k)) print("n: " + str(n)) print("c: " + str(c)) print("avg: " + str(sum(results) / len(results))) print(rollouts) print(rewards)
from common.world import World from pursuit.agents.handcoded.teammate_aware import TeammateAwareAgent from pursuit.reward import get_reward_function from pursuit.state import PursuitState from pursuit.transition import get_transition_function from pursuit.visualizers.pygame_visualizer import PygameVisualizer num_agents = 4 world_size = (5, 5) agents = [TeammateAwareAgent(i) for i in range(num_agents)] prey_moves = [(-1, 0), (1, 0), (0, 0)] transition_f = get_transition_function(num_agents, world_size, prey_moves=prey_moves) reward_f = get_reward_function(num_agents, world_size) agent_colors = [(255, 0, 0), (175, 0, 75), (75, 0, 175), (0, 0, 255)] visualizer = PygameVisualizer(200, 200, agent_colors=agent_colors, agents=agents) visualizers = (visualizer, ) initial_state = PursuitState(((0, 1), (1, 0), (0, 3), (1, 2)), ((0, 0), ), world_size) world = World(initial_state, agents, transition_f, reward_f, visualizers=visualizers) print(world.run(1, 100))
def __init__(self, state, behavior_model, environment_model, adhoc_id): super().__init__(state.agent_positions, state.prey_positions, state.world_size) self.behavior_model = behavior_model self.env_model = environment_model self.adhoc_id = adhoc_id self.reward_fn = get_reward_function(len(state.agent_positions), state.world_size)