def run(progress_q, results_q, threadid, adhoc_filename, episodes,
        results_folder, world_size):
    random_instance = random.Random(100 + threadid)
    random.seed(100 + threadid)
    np.random.seed(100 + threadid)

    num_agents = 4
    # adhoc = AdhocAgent(3, mcts_c=mcts_c, mcts_k=mcts_k, mcts_n=mcts_n, behavior_model_size=bsize,
    #                    environment_model_size=esize)
    # load_run(dataset_folder / dataset_name, adhoc, episodes, fit=False, compute_metrics=False)
    adhoc = AdhocAgent.load(adhoc_filename)
    agents = [agent_type(i) for i in range(num_agents - 1)] + [adhoc]
    transition_f = get_transition_function(num_agents, world_size,
                                           random.Random(100))
    reward_f = get_reward_function(num_agents, world_size)

    world = World(
        PursuitState.random_state(num_agents, world_size, random_instance),
        agents, transition_f, reward_f)
    timesteps, reward = world.run(0, 500)
    progress_q.put(1)

    results_q.put(
        (str(results_folder / 'results_eps{}'.format(episodes)), timesteps))
    results_q.put((str(results_folder / 'eaccuracy_eps{}'.format(episodes)),
                   np.average(adhoc.e_model.metric)))
    results_q.put((str(results_folder / 'baccuracy_eps{}'.format(episodes)),
                   np.average(adhoc.b_model.metric)))
    results_q.put(
        (str(results_folder / 'eaccuracyprey_eps{}'.format(episodes)),
         np.average(adhoc.e_model.metric_prey)))
Exemple #2
0
 def __call__(self, state_node):
     result = super().__call__(state_node)
     world = World(state_node.state, [GreedyAgent(i) for i in range(4)],
                   get_transition_function(4, world_size),
                   get_reward_function(4, world_size))
     ts, reward = world.run(0, 1000)
     rollouts[self.k].append(result)
     rewards[self.k].append(reward)
     return result
Exemple #3
0
    def __init__(self):
        logging.info('Initialising vision')
        #required on DICE:
        #self.capture = MPlayerCapture(self.rawSize)
        self.capture = Capture(self.rawSize)

        world = World('blue') # arbitrary colour
        world.pointer=None

        self.threshold = vision.threshold.AltRaw()
        self.pre = Preprocessor(self.rawSize, self.threshold, None)
        self.gui = GUI(world, self.rawSize, self.threshold)
        logging.debug('Vision initialised')
Exemple #4
0
def run(q, threadid):
    random_instance = random.Random(100 + threadid)
    random.seed(100 + threadid)
    np.random.seed(100 + threadid)

    num_agents = 4
    # adhoc = AdhocAgent(3, mcts_c=mcts_c, mcts_k=mcts_k, mcts_n=mcts_n, behavior_model_size=bsize,
    #                    environment_model_size=esize)
    adhoc = AdhocAfterNAgent(agent_type(3),
                             episodes - 1,
                             3,
                             mcts_c=mcts_c,
                             mcts_k=mcts_k,
                             mcts_n=mcts_n,
                             behavior_model_size=bsize,
                             environment_model_size=esize)
    agents = [agent_type(i) for i in range(num_agents - 1)] + [adhoc]
    transition_f = get_transition_function(num_agents, world_size,
                                           random.Random(100))
    reward_f = get_reward_function(num_agents, world_size)

    world = World(
        PursuitState.random_state(num_agents, world_size, random_instance),
        agents, transition_f, reward_f)
    results = []
    bmodelmetric = []
    emodelmetric = []
    emodelmetric_prey = []
    try:
        for i in range(episodes):
            world.initial_state = PursuitState.random_state(
                num_agents, world_size, random_instance)
            timesteps, reward = world.run(0, 200)
            results.append(timesteps)
            timesteps = max(1, timesteps)
            bmodelmetric.append(
                sum(adhoc.b_model.metric[-timesteps:]) / timesteps)
            emodelmetric.append(
                sum(adhoc.e_model.metric[-timesteps:]) / timesteps)
            emodelmetric_prey.append(
                sum(adhoc.e_model.metric_prey[-timesteps:]) / timesteps)
            q.put(1)
    finally:
        np.save(str(results_folder / 'results_{}'.format(threadid)),
                np.array(results))
        np.save(str(results_folder / 'eaccuracy_{}'.format(threadid)),
                np.array(emodelmetric))
        np.save(str(results_folder / 'baccuracy_{}'.format(threadid)),
                np.array(bmodelmetric))
        np.save(str(results_folder / 'eaccuracyprey_{}'.format(threadid)),
                np.array(emodelmetric_prey))
 def __init__(self, agents, world_size=(5, 5), max_steps=1000):
     self.world_size = world_size
     self.agent = DummyAgent(3)
     initial_state = self._get_new_state()
     transition_f = get_transition_function(4, world_size)
     reward_f = get_reward_function(4, world_size)
     self.world = World(initial_state, agents + [self.agent], transition_f,
                        reward_f)
     self.reward_range = (-1, 0)
     self.action_space = spaces.Discrete(4)
     self.observation_space = spaces.Box(low=0,
                                         high=max(world_size),
                                         shape=(8, ))
     self.max_steps = max_steps
     self.i = 0
Exemple #6
0
def save_run(filename, number_episodes, agents, world_size=(5, 5), seed=100):
    random_instance = random.Random(seed)
    num_agents = len(agents)
    transition_f = get_transition_function(num_agents, world_size, random.Random(seed))
    reward_f = get_reward_function(num_agents, world_size)
    transition_recorder = TransitionRecorder()
    world = World(PursuitState.random_state(num_agents, world_size, random_instance), agents, transition_f, reward_f,
                  visualizers=(transition_recorder, ))

    for i in range(number_episodes):
        world.initial_state = PursuitState.random_state(num_agents, world_size, random_instance)
        _, _ = world.run(0, 1000)

    output_file = open(filename, 'wb')
    pickle.dump(transition_recorder.transitions, output_file)
    output_file.close()
Exemple #7
0
def init(episodes, world_q):
    random_instance = random.Random(100)
    random.seed(100)
    np.random.seed(100)

    num_agents = 4
    adhoc = AdhocAfterNAgent(agent_type(3), episodes, 3,
                             mcts_c=mcts_c, mcts_k=mcts_k, mcts_n=mcts_n, behavior_model_size=bsize,
                             environment_model_size=esize)
    agents = [agent_type(i) for i in range(num_agents - 1)] + [adhoc]
    transition_f = get_transition_function(num_agents, world_size, random.Random(100))
    reward_f = get_reward_function(num_agents, world_size)

    world = World(PursuitState.random_state(num_agents, world_size, random_instance), agents, transition_f, reward_f)

    for _ in tqdm.tqdm(range(episodes)):
        world.initial_state = PursuitState.random_state(num_agents, world_size, random_instance)
        world.run(0, 200)

    for _ in range(n_threads):
        world_q.put(world)

    return world, adhoc
Exemple #8
0
        tree = MCTS(tree_policy=UCB1(c=self.mcts_c),
                    default_policy=RandomKStepRollOut2(self.mcts_k),
                    backup=monte_carlo)
        self.prev_action = tree(self.root, n=n)
        # print([[y.n for y in x.children.values()] for x in self.root.children.values()])
        return self.prev_action


for k in (10, 100, 1000):
    for n in (1000, ):
        for c in (100, ):
            agents = [GreedyAgent(i) for i in range(4)]
            random.seed(100)
            agents[-1] = MCTSAgent(3, n, k, c * k)
            results = []
            for i in range(1):
                world = World(
                    PursuitState.random_state(len(agents), world_size), agents,
                    get_transition_function(len(agents), world_size),
                    get_reward_function(len(agents), world_size))
                timesteps, reward = world.run(0, 1000)
                results.append(timesteps)

            print("k: " + str(k))
            print("n: " + str(n))
            print("c: " + str(c))
            print("avg: " + str(sum(results) / len(results)))

print(rollouts)
print(rewards)
Exemple #9
0
agents = [TeammateAwareAgent(i) for i in range(num_agents)]
prey_moves = [(-1, 0), (1, 0), (0, 0)]
transition_f = get_transition_function(num_agents,
                                       world_size,
                                       prey_moves=prey_moves)
reward_f = get_reward_function(num_agents, world_size)
agent_colors = [(255, 0, 0), (175, 0, 75), (75, 0, 175), (0, 0, 255)]
visualizer = PygameVisualizer(200,
                              200,
                              agent_colors=agent_colors,
                              agents=agents)
visualizers = (visualizer, )

initial_state = PursuitState(((0, 1), (1, 0), (0, 3), (1, 2)), ((0, 0), ),
                             world_size)

world = World(initial_state,
              agents,
              transition_f,
              reward_f,
              visualizers=visualizers)
print(world.run(1, 100))

# expected actions
# RIGHT LEFT UP DOWN NOOP
# 4, 2, 2, 4 DOWN LEFT LEFT DOWN
# 4, 2, 2, 1 DOWN LEFT LEFT RIGH
# 4, 3, 2, 1 DOWN UUUP LEFT RIGH
# 1, 3, 2, 3 RIGH UUUP LEFT UUUP
# 1, 3, 2, 1 RIGH UUUP LEFT RIGH
# 1, 3, 2, 1 RIGH UUUP LEFT RIGH
adhoc = AdhocAgent(3,
                   mcts_c=mcts_c,
                   mcts_k=mcts_k,
                   mcts_n=mcts_n,
                   behavior_model_size=bsize,
                   environment_model_size=esize,
                   eps=1.0,
                   fit=None)
# adhoc = AdhocAgent.load('adhoc_dataset/10x10greedy_random_200')
agents = [agent_type(i) for i in range(num_agents - 1)] + [adhoc]
transition_f = get_transition_function(num_agents, world_size,
                                       random.Random(100))
reward_f = get_reward_function(num_agents, world_size)

world = World(
    PursuitState.random_state(num_agents, world_size, random_instance), agents,
    transition_f, reward_f)
save_episodes = (1, 5, 10, 20, 50, 100, 150, 200)
current_episode = 0
for episodes in save_episodes:
    for current_episode in range(current_episode, episodes):

        world.initial_state = PursuitState.random_state(
            num_agents, world_size, random_instance)
        timesteps, reward = world.run(0, 100)
        print(timesteps)

        print("acc average " + str(np.average(adhoc.e_model.metric)))
        print("acc prey average " + str(np.average(adhoc.e_model.metric_prey)))
        print("behavior average " + str(np.average(adhoc.b_model.metric)))
Exemple #11
0
agent_colors = [(random.randint(0, 255), random.randint(0, 50),
                 random.randint(0, 255)) for _ in range(num_agents)]
visualizer = PygameVisualizer(400,
                              400,
                              agent_colors=agent_colors,
                              agents=agents)
visualizers = (visualizer, )

for i in range(iters):
    transition_f = get_transition_function(num_agents, world_size,
                                           random.Random(100 + i))
    reward_f = get_reward_function(num_agents, world_size)
    world = World(
        PursuitState.random_state(num_agents, world_size,
                                  random.Random(100 + i)),
        agents,
        transition_f,
        reward_f,
    )
    timesteps, reward = world.run(0., 5000)
    results.append(timesteps)
    print(timesteps)

plt.plot(results)
plt.plot([np.average(results[:i]) for i in range(1, len(results))],
         label='average')
plt.show()
# print(results)
# print(world_size)
# print(k)
print(np.average(results))
Exemple #12
0
#! /usr/bin/env python
# -*- coding: utf-8 -*-

from vision2.vision import Vision
from common.world import World
import sys

import logging
#logging.basicConfig(level=logging.DEBUG)

args = len(sys.argv)
if args < 1:
    print "Usage: vision.py [filename]"
    sys.exit(2)

world = World()
if args == 1:
    v = Vision(world)
elif args > 1:
    files = sys.argv[1:]
    v = Vision(world, files)

v.run()