def setup_opponents(self): if self.opponent_type == 'rules': self.opponent_agent = Agent('greedy') else: # incremental load of new model best_model_name = get_best_model_name(self.name) if self.best_model_name != best_model_name: self.opponent_models.append( load_model(self, best_model_name)) self.best_model_name = best_model_name if self.opponent_type == 'random': start = 0 end = len(self.opponent_models) - 1 i = random.randint(start, end) self.opponent_agent = Agent('ppo_opponent', self.opponent_models[i]) elif self.opponent_type == 'best': self.opponent_agent = Agent('ppo_opponent', self.opponent_models[-1]) elif self.opponent_type == 'mostly_best': j = random.uniform(0, 1) if j < 0.8: self.opponent_agent = Agent('ppo_opponent', self.opponent_models[-1]) else: start = 0 end = len(self.opponent_models) - 1 i = random.randint(start, end) self.opponent_agent = Agent('ppo_opponent', self.opponent_models[i]) elif self.opponent_type == 'base': self.opponent_agent = Agent('base', self.opponent_models[0]) self.agent_player_num = np.random.choice(self.n_players) self.agents = [self.opponent_agent] * self.n_players self.agents[self.agent_player_num] = None try: # if self.players is defined on the base environment logger.debug( f'Agent plays as Player {self.players[self.agent_player_num].id}' ) except: pass
def __init__(self, env, name="AI_PPO", size=6): super().__init__(name) # make environment ppo_model = load_model(env, f'best_model{size}.zip') self.ppo_agent = Agent('best_model', ppo_model)
def main(args): logger.configure(config.LOGDIR) if args.debug: logger.set_level(config.DEBUG) else: logger.set_level(config.INFO) # make environment env = get_environment(args.env_name)(verbose=args.verbose, manual=args.manual) env.seed(args.seed) total_rewards = {} first_time = True if args.recommend: ppo_model = load_model(env, 'best_model.zip') ppo_agent = Agent('best_model', ppo_model) else: ppo_agent = None agents = [] # load the agents if len(args.agents) != env.n_players: raise Exception( f'{len(args.agents)} players specified but this is a {env.n_players} player game!' ) for i, agent in enumerate(args.agents): if agent == 'human': agent_obj = Agent('human') elif agent == 'rules': agent_obj = Agent('rules') elif agent == 'json': # Start mq server context = zmq.Context() socket = context.socket(zmq.REP) socket.bind("tcp://*:5555") logger.debug("zaq server start at 5555") agent_obj = Agent('json') elif agent == 'base': base_model = load_model(env, 'base.zip') agent_obj = Agent('base', base_model) else: ppo_model = load_model(env, f'{agent}.zip') agent_obj = Agent(agent, ppo_model) agents.append(agent_obj) total_rewards[agent_obj.id] = 0 # play games logger.info(f'\nPlaying {args.games} games...') for game in range(args.games): players = agents[:] if args.randomise_players: random.shuffle(players) obs = env.reset() done = False for i, p in enumerate(players): logger.debug(f'Player {i+1} = {p.name}') while not done: current_player = players[env.current_player_num] env.render() logger.debug(f'\nCurrent player name: {current_player.name}') if args.recommend and current_player.name in [ 'human', 'rules', 'json' ]: # show recommendation from last loaded model logger.debug(f'\nRecommendation by {ppo_agent.name}:') action = ppo_agent.choose_action(env, choose_best_action=True, mask_invalid_actions=True) if current_player.name == 'human': action = input('\nPlease choose an action: ') try: # for int actions action = int(action) except: # for MulitDiscrete action input as list TODO action = eval(action) if current_player.name == 'json': if (not first_time): game_state = { "legal_action": [i for i, o in enumerate(env.legal_actions) if o != 0], "tableCard": env.tableCard.id } socket.send_json(game_state) action = socket.recv_json() first_time = False logger.debug(f'\nReceived {action}') # action = input('\n JSON!!! Please choose an action: ') try: # for int actions action = int(action) except: # for MulitDiscrete action input as list TODO action = eval(action) elif current_player.name == 'rules': logger.debug(f'\n{current_player.name} model choices') action = current_player.choose_action( env, choose_best_action=False, mask_invalid_actions=True) else: logger.debug(f'\n{current_player.name} model choices') action = current_player.choose_action( env, choose_best_action=args.best, mask_invalid_actions=True) obs, reward, done, _ = env.step(action) for r, player in zip(reward, players): total_rewards[player.id] += r player.points += r if args.cont: input('Press any key to continue') env.render() logger.info(f"Played {game + 1} games: {total_rewards}") if args.write_results: write_results(players, game, args.games, env.turns_taken) for p in players: p.points = 0 env.close()
def main(args): logger.configure(config.LOGDIR) if args.debug: logger.set_level(config.DEBUG) else: logger.set_level(config.INFO) #make environment env = get_environment(args.env_name)(verbose = args.verbose, manual = args.manual) env.seed(args.seed) total_rewards = {} if args.recommend: ppo_model = load_model(env, 'best_model.zip') ppo_agent = Agent('best_model', ppo_model) else: ppo_agent = None agents = [] #load the agents if len(args.agents) != env.n_players: raise Exception(f'{len(args.agents)} players specified but this is a {env.n_players} player game!') for i, agent in enumerate(args.agents): if agent == 'human': agent_obj = Agent('human') elif agent== 'greedy': agent_obj = Agent('greedy') elif agent == 'rules': agent_obj = Agent('rules') elif agent == 'base': base_model = load_model(env, 'base.zip') agent_obj = Agent('base', base_model) else: ppo_model = load_model(env, f'{agent}.zip') agent_obj = Agent(agent, ppo_model) agents.append(agent_obj) total_rewards[agent_obj.id] = 0 if args.env_name == "blobwar": human_blobwar = Human() #play games logger.info(f'\nPlaying {args.games} games...') for game in range(args.games): players = agents[:] if args.randomise_players: random.shuffle(players) obs = env.reset() done = False for i, p in enumerate(players): logger.debug(f'Player {i+1} = {p.name}') while not done: current_player = players[env.current_player_num] env.render() logger.debug(f'Current player name: {current_player.name}') if args.recommend and current_player.name in ['human', 'rules']: # show recommendation from last loaded model logger.debug(f'\nRecommendation by {ppo_agent.name}:') action = ppo_agent.choose_action(env, choose_best_action = True, mask_invalid_actions = True) if current_player.name == 'human': if args.env_name == "blobwar": move= human_blobwar.compute_next_move(env.core) action=env.encode_action(move) else: action = input('\nPlease choose an action: ') try: action = int(action) except: # for MulitDiscrete action input as list TODO action = eval(action) elif current_player.name == 'rules': logger.debug(f'\n{current_player.name} model choices') action = current_player.choose_action(env, choose_best_action = False, mask_invalid_actions = True) else: logger.debug(f'\n{current_player.name} model choices') action = current_player.choose_action(env, choose_best_action = args.best, mask_invalid_actions = True) obs, reward, done, _ = env.step(action) for r, player in zip(reward, players): total_rewards[player.id] += r player.points += r if args.cont: input('Press any key to continue') env.render() logger.info(f"Played {game + 1} games: {total_rewards}") if args.write_results: write_results(players, game, args.games, env.turns_taken) for p in players: p.points = 0 env.close()