def evaluate(agent_profile, agent_new_path, agent_old_path,
             games_num, experience_path=None, acceptance_rate=0.6,
             verbose=True, debug=False, max_steps=None, self_play_examples_deque=deque([])):
    print("Evaluating model with games_num %d and acceptance_rate %f" % (games_num, acceptance_rate))

    env_selector = EnvironmentSelector()
    agent = env_selector.get_agent(agent_profile)
    agent.set_exploration_enabled(False)

    agent_profile = env_selector.get_profile(agent_profile)
    game = env_selector.get_game(agent_profile.game)

    agents = []

    for idx in range(game.get_players_num()):
        old_agent = agent.clone()
        old_agent.load(agent_old_path)
        agents.append(old_agent)

    agent.load(agent_new_path)

    agents[0] = agent

    arena_games_results = [0] * len(agents)
    arena_examples = []
    arena_games_n = int(games_num / game.get_players_num())

    world = World()

    for jdx in range(game.get_players_num()):
        playing_agents = shift_list(agents, jdx)

        sess_arena_examples, games_results = world.execute_games(playing_agents,
                                                                 game,
                                                                 arena_games_n,
                                                                 max_game_steps_n=max_steps,
                                                                 verbose=verbose,
                                                                 show_every_turn=debug)

        games_results = shift_list(games_results, -jdx)

        for index in range(len(arena_games_results)):
            arena_games_results[index] += games_results[index]

        arena_examples.extend(sess_arena_examples)

    self_play_examples_deque += arena_examples

    if experience_path:
        serialize(self_play_examples_deque, experience_path)

    cur_rewards = arena_games_results[0]
    other_rewards = sum(arena_games_results) - cur_rewards

    print("Current agent got rewards: %d\n"
          "Total reward across all other agents: %d" % (cur_rewards, other_rewards))

    updated = (cur_rewards > other_rewards) >= acceptance_rate

    return updated
Esempio n. 2
0
 def reset(self):
     self.steps_done = 0
     self.terminal = self.steps
     self.last_pick = [-1] * self.world.n_cities
     self.world = World(self.n_agents, self.n_cities, self.steps, self.conn,
                        self.tasks, self.cities, self.rewards,
                        self.destinations, self.budget)
     self.remain_budget = self.get_budget()
Esempio n. 3
0
def generate_self_play(opt_agent_profile,
                       agent_path,
                       games_num,
                       experience_path,
                       max_steps,
                       verbose,
                       debug,
                       exploration_decay_steps,
                       optimize_for_inference=False,
                       self_play_examples_deque=deque([])):
    world = World()

    env_selector = EnvironmentSelector()

    agent = env_selector.get_agent(opt_agent_profile)

    agent.load(agent_path)

    agent_profile = env_selector.get_profile(opt_agent_profile)
    game = env_selector.get_game(agent_profile.game)

    if optimize_for_inference:
        agent.disable_training_capability()

    self_play_examples = world.generate_self_play(
        agent,
        game,
        games_num,
        max_game_steps_n=max_steps,
        verbose=verbose,
        show_every_turn=debug,
        exploration_decay_steps=exploration_decay_steps)

    self_play_examples_deque += self_play_examples

    serialize(self_play_examples_deque, experience_path)
Esempio n. 4
0
    def __init__(self, **kwargs):
        self.steps_done = 0
        # -1 represents has not been picked

        self.n_agents = kwargs["n_agents"]
        self.n_cities = kwargs["n_cities"]
        self.steps = kwargs["steps"]
        self.conn = kwargs["conn"]
        self.tasks = kwargs["tasks"]
        self.cities = kwargs["cities"]
        self.rewards = kwargs["rewards"]
        self.destinations = kwargs["destinations"]
        self.budget = kwargs["budget"]

        self.world = World(self.n_agents, self.n_cities, self.steps, self.conn,
                           self.tasks, self.cities, self.rewards,
                           self.destinations, self.budget)
        self.remain_budget = self.get_budget()
        self.terminal = self.steps
        self.last_pick = [-1] * self.world.n_cities
    if not options.random_agent:
        parser.error('Random agent profile must be selected')

    env_selector = EnvironmentSelector()
    agent = env_selector.get_agent(options.agent)
    print("Pit with agent ", agent.name)
    agent.set_exploration_enabled(False)

    random_agent = env_selector.get_agent(options.random_agent)
    print("Pit with agent ", random_agent.name)
    random_agent.set_exploration_enabled(False)

    agent_profile = env_selector.get_profile(options.agent)
    game = env_selector.get_game(agent_profile.game)

    world = World()

    agents = [agent, random_agent]

    model_files = [f for f in listdir(options.folder) if isfile(join(options.folder, f)) and f.endswith('h5')]

    row_list = []

    print_progress_bar(0, len(model_files), prefix='Progress:', suffix='Complete')
    for idx, model in enumerate(model_files):
        model_path = str(options.folder) + "/" + model
        print(model_path)
        agent.load(model_path)

        result = 0
        games_won = 0
Esempio n. 6
0
    agent_second = env_selector.get_agent(options.agent_profile_old)
    print("Pit with agent ", agent_second.name)
    agent_second.set_exploration_enabled(False)

    agent_profile = env_selector.get_profile(options.agent_profile_new)
    game = env_selector.get_game(agent_profile.game)

    agents = [agent_first, agent_second]

    if options.agent_new_path:
        agent_first.load(options.agent_new_path)
        if options.optimize_for_inference:
            agent_first.disable_training_capability(temp_dir="temp",
                                                    optimize=True)

    if options.agent_old_path:
        agent_second.load(options.agent_old_path)
        if options.optimize_for_inference:
            agent_second.disable_training_capability(temp_dir="temp",
                                                     optimize=True)

    world = World()

    sess_arena_examples, games_results = world.execute_games(
        agents,
        game,
        options.games_num,
        max_game_steps_n=options.max_steps,
        verbose=options.verbose,
        show_every_turn=options.debug)
Esempio n. 7
0
DELAY = 0
RHO = .005
ANTS = 30

GRASS_COLOR = (252, 233, 88)
ANT_COLOR = (135, 51, 25)
NEST_COLOR = (255, 255, 255)
FOOD_COLOR = (20, 100, 10)

pygame.init()
pygame.mixer.init()
screen = pygame.display.set_mode((SIZE * TAIL_SIZE, SIZE * TAIL_SIZE))
pygame.display.set_caption("Ants")
clock = pygame.time.Clock()

board = World(SIZE)
board.nest((int(SIZE / 2), int(SIZE / 2)))
board.food((10, 10))
for i in range(ANTS):
    board.breed()

mapper = Mapper(board, TAIL_SIZE, GRASS_COLOR, ANT_COLOR, NEST_COLOR,
                FOOD_COLOR)

running = True
while running:
    clock.tick(FPS)
    for event in pygame.event.get():
        if event.type == pygame.QUIT:
            running = False
Esempio n. 8
0
import pygame

from Mapper import Mapper
from core.World import World

FPS = 60
SIZE = 201
TAIL = 3

pygame.init()
pygame.mixer.init()
screen = pygame.display.set_mode((SIZE * TAIL, SIZE * TAIL))
pygame.display.set_caption("Ants 2")
clock = pygame.time.Clock()

world = World(SIZE)

# world.pheromones = np.array([
#     [0,0,0,0,0,0,0,0,0,0,0], #0
#     [0,0,0,0,0,0,0,0,0,0,0], #1
#     [0,0,0,0,0,0,0,0,0,0,0], #2
#     [0,0,0,0,0,0,0,0,0,0,0], #3
#     [0,0,0,0,0,0,0,0,0,0,0], #4
#     [0,0,0,0,0,0,0,6,0,6,0], #5
#     [0,0,0,0,0,0,0,0,0,0,0], #6
#     [0,0,0,0,0,0,0,0,0,0,0], #7
#     [0,0,0,0,0,0,0,0,0,0,0], #8
#     [0,0,0,0,0,0,0,0,0,0,0], #9
#     [0,0,0,0,0,0,0,0,0,0,0], #10
# ])
world.nest = (20, 20, 5)