Python World.run Exemples, common.world.World.run Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : adhoc_using_dataset.py Projet : goncalo-rodrigues/thesis

def run(progress_q, results_q, threadid, adhoc_filename, episodes,
        results_folder, world_size):
    random_instance = random.Random(100 + threadid)
    random.seed(100 + threadid)
    np.random.seed(100 + threadid)

    num_agents = 4
    # adhoc = AdhocAgent(3, mcts_c=mcts_c, mcts_k=mcts_k, mcts_n=mcts_n, behavior_model_size=bsize,
    #                    environment_model_size=esize)
    # load_run(dataset_folder / dataset_name, adhoc, episodes, fit=False, compute_metrics=False)
    adhoc = AdhocAgent.load(adhoc_filename)
    agents = [agent_type(i) for i in range(num_agents - 1)] + [adhoc]
    transition_f = get_transition_function(num_agents, world_size,
                                           random.Random(100))
    reward_f = get_reward_function(num_agents, world_size)

    world = World(
        PursuitState.random_state(num_agents, world_size, random_instance),
        agents, transition_f, reward_f)
    timesteps, reward = world.run(0, 500)
    progress_q.put(1)

    results_q.put(
        (str(results_folder / 'results_eps{}'.format(episodes)), timesteps))
    results_q.put((str(results_folder / 'eaccuracy_eps{}'.format(episodes)),
                   np.average(adhoc.e_model.metric)))
    results_q.put((str(results_folder / 'baccuracy_eps{}'.format(episodes)),
                   np.average(adhoc.b_model.metric)))
    results_q.put(
        (str(results_folder / 'eaccuracyprey_eps{}'.format(episodes)),
         np.average(adhoc.e_model.metric_prey)))

Exemple #2

0

Afficher le fichier

 def __call__(self, state_node):
     result = super().__call__(state_node)
     world = World(state_node.state, [GreedyAgent(i) for i in range(4)],
                   get_transition_function(4, world_size),
                   get_reward_function(4, world_size))
     ts, reward = world.run(0, 1000)
     rollouts[self.k].append(result)
     rewards[self.k].append(reward)
     return result

Exemple #3

0

Afficher le fichier

def run(q, threadid):
    random_instance = random.Random(100 + threadid)
    random.seed(100 + threadid)
    np.random.seed(100 + threadid)

    num_agents = 4
    # adhoc = AdhocAgent(3, mcts_c=mcts_c, mcts_k=mcts_k, mcts_n=mcts_n, behavior_model_size=bsize,
    #                    environment_model_size=esize)
    adhoc = AdhocAfterNAgent(agent_type(3),
                             episodes - 1,
                             3,
                             mcts_c=mcts_c,
                             mcts_k=mcts_k,
                             mcts_n=mcts_n,
                             behavior_model_size=bsize,
                             environment_model_size=esize)
    agents = [agent_type(i) for i in range(num_agents - 1)] + [adhoc]
    transition_f = get_transition_function(num_agents, world_size,
                                           random.Random(100))
    reward_f = get_reward_function(num_agents, world_size)

    world = World(
        PursuitState.random_state(num_agents, world_size, random_instance),
        agents, transition_f, reward_f)
    results = []
    bmodelmetric = []
    emodelmetric = []
    emodelmetric_prey = []
    try:
        for i in range(episodes):
            world.initial_state = PursuitState.random_state(
                num_agents, world_size, random_instance)
            timesteps, reward = world.run(0, 200)
            results.append(timesteps)
            timesteps = max(1, timesteps)
            bmodelmetric.append(
                sum(adhoc.b_model.metric[-timesteps:]) / timesteps)
            emodelmetric.append(
                sum(adhoc.e_model.metric[-timesteps:]) / timesteps)
            emodelmetric_prey.append(
                sum(adhoc.e_model.metric_prey[-timesteps:]) / timesteps)
            q.put(1)
    finally:
        np.save(str(results_folder / 'results_{}'.format(threadid)),
                np.array(results))
        np.save(str(results_folder / 'eaccuracy_{}'.format(threadid)),
                np.array(emodelmetric))
        np.save(str(results_folder / 'baccuracy_{}'.format(threadid)),
                np.array(bmodelmetric))
        np.save(str(results_folder / 'eaccuracyprey_{}'.format(threadid)),
                np.array(emodelmetric_prey))

Exemple #4

0

Afficher le fichier

def init(episodes, world_q):
    random_instance = random.Random(100)
    random.seed(100)
    np.random.seed(100)

    num_agents = 4
    adhoc = AdhocAfterNAgent(agent_type(3), episodes, 3,
                             mcts_c=mcts_c, mcts_k=mcts_k, mcts_n=mcts_n, behavior_model_size=bsize,
                             environment_model_size=esize)
    agents = [agent_type(i) for i in range(num_agents - 1)] + [adhoc]
    transition_f = get_transition_function(num_agents, world_size, random.Random(100))
    reward_f = get_reward_function(num_agents, world_size)

    world = World(PursuitState.random_state(num_agents, world_size, random_instance), agents, transition_f, reward_f)

    for _ in tqdm.tqdm(range(episodes)):
        world.initial_state = PursuitState.random_state(num_agents, world_size, random_instance)
        world.run(0, 200)

    for _ in range(n_threads):
        world_q.put(world)

    return world, adhoc

Exemple #5

0

Afficher le fichier

Fichier : utils.py Projet : goncalo-rodrigues/thesis

def save_run(filename, number_episodes, agents, world_size=(5, 5), seed=100):
    random_instance = random.Random(seed)
    num_agents = len(agents)
    transition_f = get_transition_function(num_agents, world_size, random.Random(seed))
    reward_f = get_reward_function(num_agents, world_size)
    transition_recorder = TransitionRecorder()
    world = World(PursuitState.random_state(num_agents, world_size, random_instance), agents, transition_f, reward_f,
                  visualizers=(transition_recorder, ))

    for i in range(number_episodes):
        world.initial_state = PursuitState.random_state(num_agents, world_size, random_instance)
        _, _ = world.run(0, 1000)

    output_file = open(filename, 'wb')
    pickle.dump(transition_recorder.transitions, output_file)
    output_file.close()

Exemple #6

0

Afficher le fichier

        tree = MCTS(tree_policy=UCB1(c=self.mcts_c),
                    default_policy=RandomKStepRollOut2(self.mcts_k),
                    backup=monte_carlo)
        self.prev_action = tree(self.root, n=n)
        # print([[y.n for y in x.children.values()] for x in self.root.children.values()])
        return self.prev_action


for k in (10, 100, 1000):
    for n in (1000, ):
        for c in (100, ):
            agents = [GreedyAgent(i) for i in range(4)]
            random.seed(100)
            agents[-1] = MCTSAgent(3, n, k, c * k)
            results = []
            for i in range(1):
                world = World(
                    PursuitState.random_state(len(agents), world_size), agents,
                    get_transition_function(len(agents), world_size),
                    get_reward_function(len(agents), world_size))
                timesteps, reward = world.run(0, 1000)
                results.append(timesteps)

            print("k: " + str(k))
            print("n: " + str(n))
            print("c: " + str(c))
            print("avg: " + str(sum(results) / len(results)))

print(rollouts)
print(rewards)

Exemple #7

0

Afficher le fichier

agents = [TeammateAwareAgent(i) for i in range(num_agents)]
prey_moves = [(-1, 0), (1, 0), (0, 0)]
transition_f = get_transition_function(num_agents,
                                       world_size,
                                       prey_moves=prey_moves)
reward_f = get_reward_function(num_agents, world_size)
agent_colors = [(255, 0, 0), (175, 0, 75), (75, 0, 175), (0, 0, 255)]
visualizer = PygameVisualizer(200,
                              200,
                              agent_colors=agent_colors,
                              agents=agents)
visualizers = (visualizer, )

initial_state = PursuitState(((0, 1), (1, 0), (0, 3), (1, 2)), ((0, 0), ),
                             world_size)

world = World(initial_state,
              agents,
              transition_f,
              reward_f,
              visualizers=visualizers)
print(world.run(1, 100))

# expected actions
# RIGHT LEFT UP DOWN NOOP
# 4, 2, 2, 4 DOWN LEFT LEFT DOWN
# 4, 2, 2, 1 DOWN LEFT LEFT RIGH
# 4, 3, 2, 1 DOWN UUUP LEFT RIGH
# 1, 3, 2, 3 RIGH UUUP LEFT UUUP
# 1, 3, 2, 1 RIGH UUUP LEFT RIGH
# 1, 3, 2, 1 RIGH UUUP LEFT RIGH

Exemple #8

0

Afficher le fichier

                              agent_colors=agent_colors,
                              agents=agents)
visualizers = (visualizer, )

for i in range(iters):
    transition_f = get_transition_function(num_agents, world_size,
                                           random.Random(100 + i))
    reward_f = get_reward_function(num_agents, world_size)
    world = World(
        PursuitState.random_state(num_agents, world_size,
                                  random.Random(100 + i)),
        agents,
        transition_f,
        reward_f,
    )
    timesteps, reward = world.run(0., 5000)
    results.append(timesteps)
    print(timesteps)

plt.plot(results)
plt.plot([np.average(results[:i]) for i in range(1, len(results))],
         label='average')
plt.show()
# print(results)
# print(world_size)
# print(k)
print(np.average(results))
# print(np.std(results))
print(
    st.t.interval(
        0.9, len(results) - 1, loc=np.mean(results), scale=st.sem(results)) -