def run(progress_q, results_q, threadid, adhoc_filename, episodes,
        results_folder, world_size):
    random_instance = random.Random(100 + threadid)
    random.seed(100 + threadid)
    np.random.seed(100 + threadid)

    num_agents = 4
    # adhoc = AdhocAgent(3, mcts_c=mcts_c, mcts_k=mcts_k, mcts_n=mcts_n, behavior_model_size=bsize,
    #                    environment_model_size=esize)
    # load_run(dataset_folder / dataset_name, adhoc, episodes, fit=False, compute_metrics=False)
    adhoc = AdhocAgent.load(adhoc_filename)
    agents = [agent_type(i) for i in range(num_agents - 1)] + [adhoc]
    transition_f = get_transition_function(num_agents, world_size,
                                           random.Random(100))
    reward_f = get_reward_function(num_agents, world_size)

    world = World(
        PursuitState.random_state(num_agents, world_size, random_instance),
        agents, transition_f, reward_f)
    timesteps, reward = world.run(0, 500)
    progress_q.put(1)

    results_q.put(
        (str(results_folder / 'results_eps{}'.format(episodes)), timesteps))
    results_q.put((str(results_folder / 'eaccuracy_eps{}'.format(episodes)),
                   np.average(adhoc.e_model.metric)))
    results_q.put((str(results_folder / 'baccuracy_eps{}'.format(episodes)),
                   np.average(adhoc.b_model.metric)))
    results_q.put(
        (str(results_folder / 'eaccuracyprey_eps{}'.format(episodes)),
         np.average(adhoc.e_model.metric_prey)))
Exemple #2
0
 def __init__(self, state, adhoc_id, agent_type):
     super().__init__(state.agent_positions, state.prey_positions, state.world_size)
     self.adhoc_id = adhoc_id
     self.reward_fn = get_reward_function(len(state.agent_positions), state.world_size)
     self.transi_fn = get_transition_function(len(state.agent_positions), state.world_size)
     self.agents = [agent_type(i) for i in range(3)]
     self.agent_type = agent_type
Exemple #3
0
 def __call__(self, state_node):
     result = super().__call__(state_node)
     world = World(state_node.state, [GreedyAgent(i) for i in range(4)],
                   get_transition_function(4, world_size),
                   get_reward_function(4, world_size))
     ts, reward = world.run(0, 1000)
     rollouts[self.k].append(result)
     rewards[self.k].append(reward)
     return result
Exemple #4
0
def run(q, threadid):
    random_instance = random.Random(100 + threadid)
    random.seed(100 + threadid)
    np.random.seed(100 + threadid)

    num_agents = 4
    # adhoc = AdhocAgent(3, mcts_c=mcts_c, mcts_k=mcts_k, mcts_n=mcts_n, behavior_model_size=bsize,
    #                    environment_model_size=esize)
    adhoc = AdhocAfterNAgent(agent_type(3),
                             episodes - 1,
                             3,
                             mcts_c=mcts_c,
                             mcts_k=mcts_k,
                             mcts_n=mcts_n,
                             behavior_model_size=bsize,
                             environment_model_size=esize)
    agents = [agent_type(i) for i in range(num_agents - 1)] + [adhoc]
    transition_f = get_transition_function(num_agents, world_size,
                                           random.Random(100))
    reward_f = get_reward_function(num_agents, world_size)

    world = World(
        PursuitState.random_state(num_agents, world_size, random_instance),
        agents, transition_f, reward_f)
    results = []
    bmodelmetric = []
    emodelmetric = []
    emodelmetric_prey = []
    try:
        for i in range(episodes):
            world.initial_state = PursuitState.random_state(
                num_agents, world_size, random_instance)
            timesteps, reward = world.run(0, 200)
            results.append(timesteps)
            timesteps = max(1, timesteps)
            bmodelmetric.append(
                sum(adhoc.b_model.metric[-timesteps:]) / timesteps)
            emodelmetric.append(
                sum(adhoc.e_model.metric[-timesteps:]) / timesteps)
            emodelmetric_prey.append(
                sum(adhoc.e_model.metric_prey[-timesteps:]) / timesteps)
            q.put(1)
    finally:
        np.save(str(results_folder / 'results_{}'.format(threadid)),
                np.array(results))
        np.save(str(results_folder / 'eaccuracy_{}'.format(threadid)),
                np.array(emodelmetric))
        np.save(str(results_folder / 'baccuracy_{}'.format(threadid)),
                np.array(bmodelmetric))
        np.save(str(results_folder / 'eaccuracyprey_{}'.format(threadid)),
                np.array(emodelmetric_prey))
 def __init__(self, agents, world_size=(5, 5), max_steps=1000):
     self.world_size = world_size
     self.agent = DummyAgent(3)
     initial_state = self._get_new_state()
     transition_f = get_transition_function(4, world_size)
     reward_f = get_reward_function(4, world_size)
     self.world = World(initial_state, agents + [self.agent], transition_f,
                        reward_f)
     self.reward_range = (-1, 0)
     self.action_space = spaces.Discrete(4)
     self.observation_space = spaces.Box(low=0,
                                         high=max(world_size),
                                         shape=(8, ))
     self.max_steps = max_steps
     self.i = 0
Exemple #6
0
def save_run(filename, number_episodes, agents, world_size=(5, 5), seed=100):
    random_instance = random.Random(seed)
    num_agents = len(agents)
    transition_f = get_transition_function(num_agents, world_size, random.Random(seed))
    reward_f = get_reward_function(num_agents, world_size)
    transition_recorder = TransitionRecorder()
    world = World(PursuitState.random_state(num_agents, world_size, random_instance), agents, transition_f, reward_f,
                  visualizers=(transition_recorder, ))

    for i in range(number_episodes):
        world.initial_state = PursuitState.random_state(num_agents, world_size, random_instance)
        _, _ = world.run(0, 1000)

    output_file = open(filename, 'wb')
    pickle.dump(transition_recorder.transitions, output_file)
    output_file.close()
Exemple #7
0
    def act(self, state):
        game_state = GameState(
            state.agent_positions, state.prey_positions, world_size, agents,
            get_reward_function(len(agents), world_size),
            get_transition_function(len(agents), world_size))
        if self.root is not None and state in self.root.children[
                self.prev_action].children:
            self.root = self.root.children[self.prev_action].children[state]
            self.root.parent = None
            n = self.mcts_n - self.root.n
        else:
            self.root = StateNode(None, game_state)
            n = self.mcts_n
        # print(self.mcts_n)

        tree = MCTS(tree_policy=UCB1(c=self.mcts_c),
                    default_policy=RandomKStepRollOut2(self.mcts_k),
                    backup=monte_carlo)
        self.prev_action = tree(self.root, n=n)
        # print([[y.n for y in x.children.values()] for x in self.root.children.values()])
        return self.prev_action
Exemple #8
0
def init(episodes, world_q):
    random_instance = random.Random(100)
    random.seed(100)
    np.random.seed(100)

    num_agents = 4
    adhoc = AdhocAfterNAgent(agent_type(3), episodes, 3,
                             mcts_c=mcts_c, mcts_k=mcts_k, mcts_n=mcts_n, behavior_model_size=bsize,
                             environment_model_size=esize)
    agents = [agent_type(i) for i in range(num_agents - 1)] + [adhoc]
    transition_f = get_transition_function(num_agents, world_size, random.Random(100))
    reward_f = get_reward_function(num_agents, world_size)

    world = World(PursuitState.random_state(num_agents, world_size, random_instance), agents, transition_f, reward_f)

    for _ in tqdm.tqdm(range(episodes)):
        world.initial_state = PursuitState.random_state(num_agents, world_size, random_instance)
        world.run(0, 200)

    for _ in range(n_threads):
        world_q.put(world)

    return world, adhoc
Exemple #9
0
        tree = MCTS(tree_policy=UCB1(c=self.mcts_c),
                    default_policy=RandomKStepRollOut2(self.mcts_k),
                    backup=monte_carlo)
        self.prev_action = tree(self.root, n=n)
        # print([[y.n for y in x.children.values()] for x in self.root.children.values()])
        return self.prev_action


for k in (10, 100, 1000):
    for n in (1000, ):
        for c in (100, ):
            agents = [GreedyAgent(i) for i in range(4)]
            random.seed(100)
            agents[-1] = MCTSAgent(3, n, k, c * k)
            results = []
            for i in range(1):
                world = World(
                    PursuitState.random_state(len(agents), world_size), agents,
                    get_transition_function(len(agents), world_size),
                    get_reward_function(len(agents), world_size))
                timesteps, reward = world.run(0, 1000)
                results.append(timesteps)

            print("k: " + str(k))
            print("n: " + str(n))
            print("c: " + str(c))
            print("avg: " + str(sum(results) / len(results)))

print(rollouts)
print(rewards)
Exemple #10
0
from common.world import World
from pursuit.agents.handcoded.teammate_aware import TeammateAwareAgent
from pursuit.reward import get_reward_function
from pursuit.state import PursuitState
from pursuit.transition import get_transition_function
from pursuit.visualizers.pygame_visualizer import PygameVisualizer

num_agents = 4
world_size = (5, 5)
agents = [TeammateAwareAgent(i) for i in range(num_agents)]
prey_moves = [(-1, 0), (1, 0), (0, 0)]
transition_f = get_transition_function(num_agents,
                                       world_size,
                                       prey_moves=prey_moves)
reward_f = get_reward_function(num_agents, world_size)
agent_colors = [(255, 0, 0), (175, 0, 75), (75, 0, 175), (0, 0, 255)]
visualizer = PygameVisualizer(200,
                              200,
                              agent_colors=agent_colors,
                              agents=agents)
visualizers = (visualizer, )

initial_state = PursuitState(((0, 1), (1, 0), (0, 3), (1, 2)), ((0, 0), ),
                             world_size)

world = World(initial_state,
              agents,
              transition_f,
              reward_f,
              visualizers=visualizers)
print(world.run(1, 100))
Exemple #11
0
 def __init__(self, state, behavior_model, environment_model, adhoc_id):
     super().__init__(state.agent_positions, state.prey_positions, state.world_size)
     self.behavior_model = behavior_model
     self.env_model = environment_model
     self.adhoc_id = adhoc_id
     self.reward_fn = get_reward_function(len(state.agent_positions), state.world_size)