def test_load(self): register( model_id='test_load', entry_point='rlcard3.models.pretrained_models:LeducHoldemNFSPModel' ) models.load('test_load') with self.assertRaises(ValueError): load('test_random_make')
def _load_model(self): """ Load pretrained/rule model :return: A Model object """ return models.load('mocsar-rule-v1', num_players=self.game.get_player_num())
def _load_model(self): ''' Load pretrained/rule model Returns: model (Model): A Model object ''' return models.load('leduc-holdem-cfr')
def _load_model(self): ''' Load pretrained/rule model Returns: model (Model): A Model object ''' return models.load('uno-rule-v1')
def _load_model(self): ''' Load pretrained/rule model Returns: model (Model): A Model object ''' assert False # FIXME: stub return models.load('uno-rule-v1') # FIXME: stub
def _load_model(self): """ Load pretrained/rule model based on config. :return: A Model object """ return models.load('mocsar-cfg', num_players=self.game.get_player_num(), action_num=self.game.get_action_num(), state_shape=self.state_shape)
def get_agents(agents: Dict, nr_players: int, action_num: int, state_shape: List): """ Initalize agents to play the game. :param nr_players: Number of players, amount of agents generated :param agents: Dictionary of agent_name: number of agents pairs """ agent_list = list() i = 0 for agent_id, nr_agents in agents.items(): if agent_id == 'mocsar-nfsp-pytorch': # Pre trained model from rlcard3.models.pretrained_models, NFSP has multiple (four) agents in it # Here we directly load NFSP models from /models module nfsp_agents = models.load(agent_id, num_players=nr_players, action_num=action_num, state_shape=state_shape).agents for j in range(nr_agents): agent_list.append(nfsp_agents[j]) i += 1 if i >= nr_players: return agent_list for _ in range(nr_agents): if agent_id in [ 'mocsar-dqn-pytorch', 'mocsar-dqn-pytorchr', 'mocsar-nfsp-pytorch', 'mocsar-nfsp-pytorchm' ]: # Pre trained model from rlcard3.models.pretrained_models, DQN rule_agent = models.load(agent_id, num_players=nr_players, action_num=action_num, state_shape=state_shape).agents[0] else: # Models from model_agents rule_agent = load(agent_id=agent_id) agent_list.append(rule_agent) i += 1 if i >= nr_players: return agent_list
''' Another example of loading a pre-trained NFSP model on Leduc Hold'em Here, we directly load the model from model zoo ''' import rlcard3 from rlcard3.agents.random_agent import RandomAgent from rlcard3.utils.utils import set_global_seed, tournament from rlcard3 import models # Make environment env = rlcard3.make('leduc-holdem') # Set a global seed set_global_seed(0) # Here we directly load NFSP models from /models module nfsp_agents = models.load('leduc-holdem-nfsp-pytorch').agents # Evaluate the performance. Play with random agents. evaluate_num = 10000 random_agent = RandomAgent(env.action_num) env.set_agents([nfsp_agents[0], random_agent]) reward = tournament(env, evaluate_num)[0] print('Average reward against random agent: ', reward)
save_plot_every = 1000 evaluate_num = 10000 episode_num = 10000 # The paths for saving the logs and learning curves log_dir = './experiments/leduc_holdem_cfr_result/' # Set a global seed set_global_seed(0) # Initilize CFR Agent agent = CFRAgent(env) agent.load() # If we have saved model, we first load the model # Evaluate CFR against pre-trained NFSP eval_env.set_agents([agent, models.load('leduc-holdem-nfsp').agents[0]]) # Init a Logger to plot the learning curve logger = Logger(log_dir) for episode in range(episode_num): agent.train() print('\rIteration {}'.format(episode), end='') # Evaluate the performance. Play with NFSP agents. if episode % evaluate_every == 0: agent.save() # Save model logger.log_performance(env.timestep, tournament(eval_env, evaluate_num)[0]) # Close files in the logger logger.close_files()
''' A toy example of playing against pretrianed AI on Leduc Hold'em ''' import rlcard3 from rlcard3 import models from rlcard3.agents.leduc_holdem_human_agent import HumanAgent from rlcard3.utils.utils import print_card # Make environment and enable human mode # Set 'record_action' to True because we need it to print results env = rlcard3.make('leduc-holdem', config={'record_action': True}) human_agent = HumanAgent(env.action_num) cfr_agent = models.load('leduc-holdem-cfr').agents[0] env.set_agents([human_agent, cfr_agent]) print(">> Leduc Hold'em pre-trained model") while (True): print(">> Start a new game") trajectories, payoffs = env.run(is_training=False) # If the human does not take the final action, we need to # print other players action final_state = trajectories[0][-1][-2] action_record = final_state['action_record'] state = final_state['raw_obs'] _action_list = [] for i in range(1, len(action_record) + 1): if action_record[-i][0] == state['current_player']: break _action_list.insert(0, action_record[-i])
''' Another example of loading a pre-trained NFSP model on Leduc Hold'em Here, we directly load the model from model zoo ''' import rlcard3 from rlcard3.agents.random_agent import RandomAgent from rlcard3.utils.utils import set_global_seed, tournament from rlcard3 import models # Make environment env = rlcard3.make('leduc-holdem') # Set a global seed set_global_seed(0) # Here we directly load NFSP models from /models module nfsp_agents = models.load('leduc-holdem-nfsp').agents # Evaluate the performance. Play with random agents. evaluate_num = 10000 random_agent = RandomAgent(env.action_num) env.set_agents([nfsp_agents[0], random_agent]) reward = tournament(env, evaluate_num)[0] print('Average reward against random agent: ', reward)
from rlcard3.utils.utils import set_global_seed, tournament from rlcard3 import models from rlcard3.utils.config_read import Config # Make environment env = rlcard3.make('mocsar') # Get parameters conf = Config('environ.properties') evaluate_num = conf.get_int(section='cfg.compare', key='nr_games') agent_str = conf.get_str(section='cfg.compare', key="agent_str") nr_cards = conf.get_int(section='global', key='nr_cards') # Set a global seed #set_global_seed(0) # Here we directly load NFSP models from /models module dqn_agents = models.load(agent_str, num_players=env.game.get_player_num(), action_num=env.action_num, state_shape=env.state_shape).agents # Evaluate the performance. Play with random agents. random_agent = RandomAgent(env.action_num) env.game.set_game_params(num_players=4, num_cards=nr_cards) env.set_agents([dqn_agents[0], random_agent, random_agent, random_agent]) reward = tournament(env, evaluate_num)[0] print( f'Average reward for {agent_str} against random agent: {reward}, cards: {nr_cards} ' )