Exemple #1
0
def run(q, threadid):
    random_instance = random.Random(100 + threadid)
    random.seed(100 + threadid)
    np.random.seed(100 + threadid)

    num_agents = 4
    # adhoc = AdhocAgent(3, mcts_c=mcts_c, mcts_k=mcts_k, mcts_n=mcts_n, behavior_model_size=bsize,
    #                    environment_model_size=esize)
    adhoc = AdhocAfterNAgent(agent_type(3),
                             episodes - 1,
                             3,
                             mcts_c=mcts_c,
                             mcts_k=mcts_k,
                             mcts_n=mcts_n,
                             behavior_model_size=bsize,
                             environment_model_size=esize)
    agents = [agent_type(i) for i in range(num_agents - 1)] + [adhoc]
    transition_f = get_transition_function(num_agents, world_size,
                                           random.Random(100))
    reward_f = get_reward_function(num_agents, world_size)

    world = World(
        PursuitState.random_state(num_agents, world_size, random_instance),
        agents, transition_f, reward_f)
    results = []
    bmodelmetric = []
    emodelmetric = []
    emodelmetric_prey = []
    try:
        for i in range(episodes):
            world.initial_state = PursuitState.random_state(
                num_agents, world_size, random_instance)
            timesteps, reward = world.run(0, 200)
            results.append(timesteps)
            timesteps = max(1, timesteps)
            bmodelmetric.append(
                sum(adhoc.b_model.metric[-timesteps:]) / timesteps)
            emodelmetric.append(
                sum(adhoc.e_model.metric[-timesteps:]) / timesteps)
            emodelmetric_prey.append(
                sum(adhoc.e_model.metric_prey[-timesteps:]) / timesteps)
            q.put(1)
    finally:
        np.save(str(results_folder / 'results_{}'.format(threadid)),
                np.array(results))
        np.save(str(results_folder / 'eaccuracy_{}'.format(threadid)),
                np.array(emodelmetric))
        np.save(str(results_folder / 'baccuracy_{}'.format(threadid)),
                np.array(bmodelmetric))
        np.save(str(results_folder / 'eaccuracyprey_{}'.format(threadid)),
                np.array(emodelmetric_prey))
Exemple #2
0
def save_run(filename, number_episodes, agents, world_size=(5, 5), seed=100):
    random_instance = random.Random(seed)
    num_agents = len(agents)
    transition_f = get_transition_function(num_agents, world_size, random.Random(seed))
    reward_f = get_reward_function(num_agents, world_size)
    transition_recorder = TransitionRecorder()
    world = World(PursuitState.random_state(num_agents, world_size, random_instance), agents, transition_f, reward_f,
                  visualizers=(transition_recorder, ))

    for i in range(number_episodes):
        world.initial_state = PursuitState.random_state(num_agents, world_size, random_instance)
        _, _ = world.run(0, 1000)

    output_file = open(filename, 'wb')
    pickle.dump(transition_recorder.transitions, output_file)
    output_file.close()
Exemple #3
0
def init(episodes, world_q):
    random_instance = random.Random(100)
    random.seed(100)
    np.random.seed(100)

    num_agents = 4
    adhoc = AdhocAfterNAgent(agent_type(3), episodes, 3,
                             mcts_c=mcts_c, mcts_k=mcts_k, mcts_n=mcts_n, behavior_model_size=bsize,
                             environment_model_size=esize)
    agents = [agent_type(i) for i in range(num_agents - 1)] + [adhoc]
    transition_f = get_transition_function(num_agents, world_size, random.Random(100))
    reward_f = get_reward_function(num_agents, world_size)

    world = World(PursuitState.random_state(num_agents, world_size, random_instance), agents, transition_f, reward_f)

    for _ in tqdm.tqdm(range(episodes)):
        world.initial_state = PursuitState.random_state(num_agents, world_size, random_instance)
        world.run(0, 200)

    for _ in range(n_threads):
        world_q.put(world)

    return world, adhoc
                   mcts_c=mcts_c,
                   mcts_k=mcts_k,
                   mcts_n=mcts_n,
                   behavior_model_size=bsize,
                   environment_model_size=esize,
                   eps=1.0,
                   fit=None)
# adhoc = AdhocAgent.load('adhoc_dataset/10x10greedy_random_200')
agents = [agent_type(i) for i in range(num_agents - 1)] + [adhoc]
transition_f = get_transition_function(num_agents, world_size,
                                       random.Random(100))
reward_f = get_reward_function(num_agents, world_size)

world = World(
    PursuitState.random_state(num_agents, world_size, random_instance), agents,
    transition_f, reward_f)
save_episodes = (1, 5, 10, 20, 50, 100, 150, 200)
current_episode = 0
for episodes in save_episodes:
    for current_episode in range(current_episode, episodes):

        world.initial_state = PursuitState.random_state(
            num_agents, world_size, random_instance)
        timesteps, reward = world.run(0, 100)
        print(timesteps)

        print("acc average " + str(np.average(adhoc.e_model.metric)))
        print("acc prey average " + str(np.average(adhoc.e_model.metric_prey)))
        print("behavior average " + str(np.average(adhoc.b_model.metric)))

    adhoc.save(str(folder / ('10x10greedy_random_epochs5_' + str(episodes))))