def __init__(self): ''' Load pretrained model ''' env = rlcard.make('leduc-holdem') self.agent = CFRAgent(env, model_path=os.path.join(ROOT_PATH, 'leduc_holdem_cfr')) self.agent.load()
def test_train(self): env = rlcard.make('leduc-holdem', config={'allow_step_back': True}) agent = CFRAgent(env) for _ in range(100): agent.train() state = { 'obs': np.array([1., 1., 0., 0., 0., 0.]), 'legal_actions': [0, 2] } action, _ = agent.eval_step(state) self.assertIn(action, [0, 2])
def test_save_and_load(self): env = rlcard.make('leduc-holdem', config={'allow_step_back': True}) agent = CFRAgent(env) for _ in range(100): agent.train() agent.save() new_agent = CFRAgent(env) new_agent.load() self.assertEqual(len(agent.policy), len(new_agent.policy)) self.assertEqual(len(agent.average_policy), len(new_agent.average_policy)) self.assertEqual(len(agent.regrets), len(new_agent.regrets)) self.assertEqual(agent.iteration, new_agent.iteration)
class LeducHoldemCFRModel(Model): ''' A pretrained model on Leduc Holdem with CFR ''' def __init__(self): ''' Load pretrained model ''' env = rlcard.make('leduc-holdem') self.agent = CFRAgent(env, model_path=os.path.join(ROOT_PATH, 'leduc_holdem_cfr')) self.agent.load() @property def agents(self): ''' Get a list of agents for each position in a the game Returns: agents (list): A list of agents Note: Each agent should be just like RL agent with step and eval_step functioning well. ''' return [self.agent, self.agent]
eval_env = rlcard.make('leduc-holdem') # Set the iterations numbers and how frequently we evaluate the performance and save model evaluate_every = 100 save_plot_every = 1000 evaluate_num = 10000 episode_num = 10000 # The paths for saving the logs and learning curves log_dir = './experiments/leduc_holdem_cfr_result/' # Set a global seed set_global_seed(0) # Initilize CFR Agent agent = CFRAgent(env) agent.load() # If we have saved model, we first load the model # Evaluate CFR against pre-trained NFSP eval_env.set_agents([agent, models.load('leduc-holdem-nfsp').agents[0]]) # Init a Logger to plot the learning curve logger = Logger(log_dir) for episode in range(episode_num): agent.train() print('\rIteration {}'.format(episode), end='') # Evaluate the performance. Play with NFSP agents. if episode % evaluate_every == 0: agent.save() # Save model logger.log_performance(env.timestep,
def load_cfr_leduc_agent(model_path): agent = CFRAgent(env, model_path=model_path) agent.load() return agent
evaluate_every = 100 save_plot_every = 1000 evaluate_num = 10000 episode_num = 10000000 # The paths for saving the logs and learning curves root_path = './experiments/mahjong_cfr_result/' log_path = root_path + 'log.txt' csv_path = root_path + 'performance.csv' figure_path = root_path + 'figures/' # Set a global seed set_global_seed(0) # Initilize CFR Agent agent = CFRAgent(env) # Init a Logger to plot the learning curve logger = Logger(root_path) for episode in range(episode_num): agent.train() print('\rIteration {}'.format(episode), end='') agent.save() # Evaluate the performance. Play with NFSP agents. if episode % evaluate_every == 0: reward = 0 for eval_episode in range(evaluate_num): _, payoffs = eval_env.run(is_training=False) reward += payoffs[0]
}) # Set the iterations numbers and how frequently we evaluate/save plot evaluate_every = 100 save_plot_every = 1000 evaluate_num = 10000 episode_num = 10000000 # The paths for saving the logs and learning curves log_dir = './experiments/leduc_holdem_br_result/' # Set a global seed set_global_seed(0) # Initilize CFR Agent opponent = CFRAgent(env) #opponent = RandomAgent(action_num=env.action_num) #opponent.load() # If we have saved model, we first load the model #agent = RandomAgent(action_num=env.action_num) agent = BRAgent(eval_env, opponent) #agent = CFRAgent(env) eval_env.set_agents([agent, opponent]) # Init a Logger to plot the learning curve logger = Logger(log_dir) for episode in range(episode_num): opponent.train() #agent.train() print('\rIteration {}'.format(episode), end='')