def evaluate(agent_profile, agent_new_path, agent_old_path, games_num, experience_path=None, acceptance_rate=0.6, verbose=True, debug=False, max_steps=None, self_play_examples_deque=deque([])): print("Evaluating model with games_num %d and acceptance_rate %f" % (games_num, acceptance_rate)) env_selector = EnvironmentSelector() agent = env_selector.get_agent(agent_profile) agent.set_exploration_enabled(False) agent_profile = env_selector.get_profile(agent_profile) game = env_selector.get_game(agent_profile.game) agents = [] for idx in range(game.get_players_num()): old_agent = agent.clone() old_agent.load(agent_old_path) agents.append(old_agent) agent.load(agent_new_path) agents[0] = agent arena_games_results = [0] * len(agents) arena_examples = [] arena_games_n = int(games_num / game.get_players_num()) world = World() for jdx in range(game.get_players_num()): playing_agents = shift_list(agents, jdx) sess_arena_examples, games_results = world.execute_games(playing_agents, game, arena_games_n, max_game_steps_n=max_steps, verbose=verbose, show_every_turn=debug) games_results = shift_list(games_results, -jdx) for index in range(len(arena_games_results)): arena_games_results[index] += games_results[index] arena_examples.extend(sess_arena_examples) self_play_examples_deque += arena_examples if experience_path: serialize(self_play_examples_deque, experience_path) cur_rewards = arena_games_results[0] other_rewards = sum(arena_games_results) - cur_rewards print("Current agent got rewards: %d\n" "Total reward across all other agents: %d" % (cur_rewards, other_rewards)) updated = (cur_rewards > other_rewards) >= acceptance_rate return updated
def reset(self): self.steps_done = 0 self.terminal = self.steps self.last_pick = [-1] * self.world.n_cities self.world = World(self.n_agents, self.n_cities, self.steps, self.conn, self.tasks, self.cities, self.rewards, self.destinations, self.budget) self.remain_budget = self.get_budget()
def generate_self_play(opt_agent_profile, agent_path, games_num, experience_path, max_steps, verbose, debug, exploration_decay_steps, optimize_for_inference=False, self_play_examples_deque=deque([])): world = World() env_selector = EnvironmentSelector() agent = env_selector.get_agent(opt_agent_profile) agent.load(agent_path) agent_profile = env_selector.get_profile(opt_agent_profile) game = env_selector.get_game(agent_profile.game) if optimize_for_inference: agent.disable_training_capability() self_play_examples = world.generate_self_play( agent, game, games_num, max_game_steps_n=max_steps, verbose=verbose, show_every_turn=debug, exploration_decay_steps=exploration_decay_steps) self_play_examples_deque += self_play_examples serialize(self_play_examples_deque, experience_path)
def __init__(self, **kwargs): self.steps_done = 0 # -1 represents has not been picked self.n_agents = kwargs["n_agents"] self.n_cities = kwargs["n_cities"] self.steps = kwargs["steps"] self.conn = kwargs["conn"] self.tasks = kwargs["tasks"] self.cities = kwargs["cities"] self.rewards = kwargs["rewards"] self.destinations = kwargs["destinations"] self.budget = kwargs["budget"] self.world = World(self.n_agents, self.n_cities, self.steps, self.conn, self.tasks, self.cities, self.rewards, self.destinations, self.budget) self.remain_budget = self.get_budget() self.terminal = self.steps self.last_pick = [-1] * self.world.n_cities
if not options.random_agent: parser.error('Random agent profile must be selected') env_selector = EnvironmentSelector() agent = env_selector.get_agent(options.agent) print("Pit with agent ", agent.name) agent.set_exploration_enabled(False) random_agent = env_selector.get_agent(options.random_agent) print("Pit with agent ", random_agent.name) random_agent.set_exploration_enabled(False) agent_profile = env_selector.get_profile(options.agent) game = env_selector.get_game(agent_profile.game) world = World() agents = [agent, random_agent] model_files = [f for f in listdir(options.folder) if isfile(join(options.folder, f)) and f.endswith('h5')] row_list = [] print_progress_bar(0, len(model_files), prefix='Progress:', suffix='Complete') for idx, model in enumerate(model_files): model_path = str(options.folder) + "/" + model print(model_path) agent.load(model_path) result = 0 games_won = 0
agent_second = env_selector.get_agent(options.agent_profile_old) print("Pit with agent ", agent_second.name) agent_second.set_exploration_enabled(False) agent_profile = env_selector.get_profile(options.agent_profile_new) game = env_selector.get_game(agent_profile.game) agents = [agent_first, agent_second] if options.agent_new_path: agent_first.load(options.agent_new_path) if options.optimize_for_inference: agent_first.disable_training_capability(temp_dir="temp", optimize=True) if options.agent_old_path: agent_second.load(options.agent_old_path) if options.optimize_for_inference: agent_second.disable_training_capability(temp_dir="temp", optimize=True) world = World() sess_arena_examples, games_results = world.execute_games( agents, game, options.games_num, max_game_steps_n=options.max_steps, verbose=options.verbose, show_every_turn=options.debug)
DELAY = 0 RHO = .005 ANTS = 30 GRASS_COLOR = (252, 233, 88) ANT_COLOR = (135, 51, 25) NEST_COLOR = (255, 255, 255) FOOD_COLOR = (20, 100, 10) pygame.init() pygame.mixer.init() screen = pygame.display.set_mode((SIZE * TAIL_SIZE, SIZE * TAIL_SIZE)) pygame.display.set_caption("Ants") clock = pygame.time.Clock() board = World(SIZE) board.nest((int(SIZE / 2), int(SIZE / 2))) board.food((10, 10)) for i in range(ANTS): board.breed() mapper = Mapper(board, TAIL_SIZE, GRASS_COLOR, ANT_COLOR, NEST_COLOR, FOOD_COLOR) running = True while running: clock.tick(FPS) for event in pygame.event.get(): if event.type == pygame.QUIT: running = False
import pygame from Mapper import Mapper from core.World import World FPS = 60 SIZE = 201 TAIL = 3 pygame.init() pygame.mixer.init() screen = pygame.display.set_mode((SIZE * TAIL, SIZE * TAIL)) pygame.display.set_caption("Ants 2") clock = pygame.time.Clock() world = World(SIZE) # world.pheromones = np.array([ # [0,0,0,0,0,0,0,0,0,0,0], #0 # [0,0,0,0,0,0,0,0,0,0,0], #1 # [0,0,0,0,0,0,0,0,0,0,0], #2 # [0,0,0,0,0,0,0,0,0,0,0], #3 # [0,0,0,0,0,0,0,0,0,0,0], #4 # [0,0,0,0,0,0,0,6,0,6,0], #5 # [0,0,0,0,0,0,0,0,0,0,0], #6 # [0,0,0,0,0,0,0,0,0,0,0], #7 # [0,0,0,0,0,0,0,0,0,0,0], #8 # [0,0,0,0,0,0,0,0,0,0,0], #9 # [0,0,0,0,0,0,0,0,0,0,0], #10 # ]) world.nest = (20, 20, 5)