def __init__(self):
     ''' Load pretrained model
     '''
     env = rlcard.make('leduc-holdem')
     self.agent = CFRAgent(env,
                           model_path=os.path.join(ROOT_PATH,
                                                   'leduc_holdem_cfr'))
     self.agent.load()
Exemple #2
0
    def test_train(self):

        env = rlcard.make('leduc-holdem', config={'allow_step_back': True})
        agent = CFRAgent(env)

        for _ in range(100):
            agent.train()

        state = {
            'obs': np.array([1., 1., 0., 0., 0., 0.]),
            'legal_actions': [0, 2]
        }
        action, _ = agent.eval_step(state)

        self.assertIn(action, [0, 2])
Exemple #3
0
    def test_save_and_load(self):
        env = rlcard.make('leduc-holdem', config={'allow_step_back': True})
        agent = CFRAgent(env)

        for _ in range(100):
            agent.train()

        agent.save()

        new_agent = CFRAgent(env)
        new_agent.load()
        self.assertEqual(len(agent.policy), len(new_agent.policy))
        self.assertEqual(len(agent.average_policy),
                         len(new_agent.average_policy))
        self.assertEqual(len(agent.regrets), len(new_agent.regrets))
        self.assertEqual(agent.iteration, new_agent.iteration)
Exemple #4
0
class LeducHoldemCFRModel(Model):
    ''' A pretrained model on Leduc Holdem with CFR
    '''

    def __init__(self):
        ''' Load pretrained model
        '''
        env = rlcard.make('leduc-holdem')
        self.agent = CFRAgent(env, model_path=os.path.join(ROOT_PATH, 'leduc_holdem_cfr'))
        self.agent.load()
    @property
    def agents(self):
        ''' Get a list of agents for each position in a the game

        Returns:
            agents (list): A list of agents

        Note: Each agent should be just like RL agent with step and eval_step
              functioning well.
        '''
        return [self.agent, self.agent]
Exemple #5
0
eval_env = rlcard.make('leduc-holdem')

# Set the iterations numbers and how frequently we evaluate the performance and save model
evaluate_every = 100
save_plot_every = 1000
evaluate_num = 10000
episode_num = 10000

# The paths for saving the logs and learning curves
log_dir = './experiments/leduc_holdem_cfr_result/'

# Set a global seed
set_global_seed(0)

# Initilize CFR Agent
agent = CFRAgent(env)
agent.load()  # If we have saved model, we first load the model

# Evaluate CFR against pre-trained NFSP
eval_env.set_agents([agent, models.load('leduc-holdem-nfsp').agents[0]])

# Init a Logger to plot the learning curve
logger = Logger(log_dir)

for episode in range(episode_num):
    agent.train()
    print('\rIteration {}'.format(episode), end='')
    # Evaluate the performance. Play with NFSP agents.
    if episode % evaluate_every == 0:
        agent.save()  # Save model
        logger.log_performance(env.timestep,
Exemple #6
0
def load_cfr_leduc_agent(model_path):
    agent = CFRAgent(env, model_path=model_path)
    agent.load()
    return agent
Exemple #7
0
evaluate_every = 100
save_plot_every = 1000
evaluate_num = 10000
episode_num = 10000000

# The paths for saving the logs and learning curves
root_path = './experiments/mahjong_cfr_result/'
log_path = root_path + 'log.txt'
csv_path = root_path + 'performance.csv'
figure_path = root_path + 'figures/'

# Set a global seed
set_global_seed(0)

# Initilize CFR Agent
agent = CFRAgent(env)

# Init a Logger to plot the learning curve
logger = Logger(root_path)

for episode in range(episode_num):
    agent.train()
    print('\rIteration {}'.format(episode), end='')
    agent.save()
    # Evaluate the performance. Play with NFSP agents.
    if episode % evaluate_every == 0:
        reward = 0
        for eval_episode in range(evaluate_num):
            _, payoffs = eval_env.run(is_training=False)

            reward += payoffs[0]
Exemple #8
0
                       })

# Set the iterations numbers and how frequently we evaluate/save plot
evaluate_every = 100
save_plot_every = 1000
evaluate_num = 10000
episode_num = 10000000

# The paths for saving the logs and learning curves
log_dir = './experiments/leduc_holdem_br_result/'

# Set a global seed
set_global_seed(0)

# Initilize CFR Agent
opponent = CFRAgent(env)
#opponent = RandomAgent(action_num=env.action_num)
#opponent.load()  # If we have saved model, we first load the model

#agent = RandomAgent(action_num=env.action_num)
agent = BRAgent(eval_env, opponent)
#agent = CFRAgent(env)

eval_env.set_agents([agent, opponent])
# Init a Logger to plot the learning curve
logger = Logger(log_dir)

for episode in range(episode_num):
    opponent.train()
    #agent.train()
    print('\rIteration {}'.format(episode), end='')