예제 #1
0
 def test_load(self):
     register(
         model_id='test_load',
         entry_point='rlcard.models.pretrained_models:LeducHoldemNFSPModel')
     models.load('test_load')
     with self.assertRaises(ValueError):
         load('test_random_make')
예제 #2
0
 def load_model(self) -> dict:
     """
     Load pretrained/rule model
     :return: a dictionary with three models corresponding to each game part
     """
     return {
         'BID': models.load('tarot-bid-rule-v1'),
         'DOG': models.load('tarot-dog-rule-v1'),
         'MAIN': models.load('tarot-rule-v1')
     }
예제 #3
0
    def _load_model(self):
        ''' Load pretrained/rule model

        Returns:
            model (Model): A Model object
        '''
        return models.load('leduc-holdem-cfr')
예제 #4
0
    def load_model(self):
        ''' Load pretrained/rule model

        Returns:
            model (Model): A Model object
        '''
        return models.load(self.modelname)
예제 #5
0
    def _load_model(self):
        ''' Load pretrained/rule model

        Returns:
            model (Model): A Model object
        '''
        return models.load('uno-rule-v1')
예제 #6
0
    def _load_model(self):
        ''' Load pretrained/rule model

        Returns:
            model (Model): A Model object
        '''
        from rlcard import models
        return models.load('kuhn-poker-cfr')
예제 #7
0
    def _load_model(self):
        ''' Load pretrained/rule model

        Returns:
            model (Model): A Model object
        '''
        from rlcard import models
        return models.load('whale-rule-v1')
예제 #8
0
 def _load_model(self):
     ''' Load pretrained/rule model
     Returns:
         model (Model): A Model object
     '''
     from rlcard import models
     #return models.load('nolimit-holdem-random')
     return models.load('nolimit-holdem-tp')
예제 #9
0
    def _load_model(self):
        ''' Load pretrained/rule model

        Returns:
            model (Model): A Model object
        '''
        assert False  # FIXME: stub
        return models.load('uno-rule-v1')  # FIXME: stub
예제 #10
0
    def _load_model(self):
        """Load pretrained/rule model

        Returns:
            model (Model): A Model object
        """
        from rlcard import models

        return models.load("leduc-holdem-cfr")
예제 #11
0
def load_model(model_path, env=None, position=None, device=None):
    if os.path.isfile(model_path):  # Torch model
        import torch
        agent = torch.load(model_path, map_location=device)
        agent.set_device(device)
    elif os.path.isdir(model_path):  # CFR model
        from rlcard.agents import CFRAgent
        agent = CFRAgent(env, model_path)
        agent.load()
    elif model_path == 'random':  # Random model
        from rlcard.agents import RandomAgent
        agent = RandomAgent(num_actions=env.num_actions)
    else:  # A model in the model zoo
        from rlcard import models
        agent = models.load(model_path).agents[position]

    return agent
예제 #12
0
def train_leduc():
    # Make environment and enable human mode
    env = rlcard.make('leduc-holdem',
                      config={
                          'seed': 0,
                          'allow_step_back': True
                      })
    eval_env = rlcard.make('leduc-holdem', config={'seed': 0})

    # Set the iterations numbers and how frequently we evaluate the performance and save model
    evaluate_every = 100
    save_plot_every = 1000
    evaluate_num = 10000
    episode_num = 10000

    # The paths for saving the logs and learning curves
    log_dir = './experiments/leduc_holdem_oscfr_result/'

    # Set a global seed
    set_global_seed(0)

    # Initilize CFR Agent
    model_path = 'models/leduc_holdem_oscfr'
    agent = OutcomeSampling_CFR(env, model_path=model_path)
    agent.load()  # If we have saved model, we first load the model

    # Evaluate CFR against pre-trained NFSP
    eval_env.set_agents([agent, models.load('leduc-holdem-nfsp').agents[0]])

    # Init a Logger to plot the learning curve
    logger = Logger(log_dir)

    for episode in range(episode_num):
        agent.train()
        print('\rIteration {}'.format(episode), end='')
        # Evaluate the performance. Play with NFSP agents.
        if episode % evaluate_every == 0:
            agent.save()  # Save model
            logger.log_performance(env.timestep,
                                   tournament(eval_env, evaluate_num)[0])

    # Close files in the logger
    logger.close_files()

    # Plot the learning curve
    logger.plot('OSCFR')
예제 #13
0
def main():
    # Make environment
    env = rlcard.make('leduc-holdem', config={'seed': 0, 'env_num': 4})
    eval_env = rlcard.make('leduc-holdem', config={'seed': 0, 'env_num': 4})

    # Set the iterations numbers and how frequently we evaluate the performance
    evaluate_every = 100
    evaluate_num = 10000
    episode_num = 800000

    # The intial memory size
    memory_init_size = 1000

    # Train the agent every X steps
    train_every = 1

    _reward_max = -0.5

    # The paths for saving the logs and learning curves
    log_dir = './experiments/leduc_holdem_dqn_result/'

    # Set a global seed
    set_global_seed(0)

    with tf.Session() as sess:

        # Initialize a global step
        global_step = tf.Variable(0, name='global_step', trainable=False)

        # Set up the agents
        agent = DQNAgent(sess,
                         scope='dqn',
                         action_num=env.action_num,
                         replay_memory_init_size=memory_init_size,
                         train_every=train_every,
                         state_shape=env.state_shape,
                         mlp_layers=[128, 128])
        # random_agent = RandomAgent(action_num=eval_env.action_num)
        cfr_agent = models.load('leduc-holdem-cfr').agents[0]
        env.set_agents([agent, agent])
        eval_env.set_agents([agent, cfr_agent])

        # Initialize global variables
        sess.run(tf.global_variables_initializer())

        # Init a Logger to plot the learning curve
        logger = Logger(log_dir)

        saver = tf.train.Saver()
        save_dir = 'models/leduc_holdem_dqn'
        saver.restore(sess, os.path.join(save_dir, 'model'))

        for episode in range(episode_num):

            # Generate data from the environment
            trajectories, _ = env.run(is_training=True)

            # Feed transitions into agent memory, and train the agent
            for ts in trajectories[0]:
                agent.feed(ts)

            # Evaluate the performance. Play with random agents.
            if episode % evaluate_every == 0:
                _reward = tournament(eval_env, evaluate_num)[0]
                logger.log_performance(episode, _reward)
                if _reward > _reward_max:
                    # Save model
                    if not os.path.exists(save_dir):
                        os.makedirs(save_dir)
                    saver.save(sess, os.path.join(save_dir, 'model'))
                    _reward_max = _reward

        # Close files in the logger
        logger.close_files()

        # Plot the learning curve
        logger.plot('DQN')
''' Another example of loading a pre-trained NFSP model on Leduc Hold'em
    Here, we directly load the model from model zoo
'''
import rlcard
from rlcard.agents import RandomAgent
from rlcard.utils import set_global_seed, tournament
from rlcard import models

# Make environment
env = rlcard.make('leduc-holdem', config={'seed': 0})

# Set a global seed
set_global_seed(0)

# Here we directly load NFSP models from /models module
nfsp_agents = models.load('leduc-holdem-nfsp').agents

# Evaluate the performance. Play with random agents.
evaluate_num = 10000
random_agent = RandomAgent(env.action_num)
env.set_agents([nfsp_agents[0], random_agent])
reward = tournament(env, evaluate_num)[0]
print('Average reward against random agent: ', reward)

예제 #15
0
from rlcard import models
from yaniv_env import YanivEnv
from rlcard.agents.random_agent import RandomAgent

config = {}
env = YanivEnv(config)

random_agent = RandomAgent(action_num=env.action_num)
models.register('yaniv-dqn', 'pretrained_model:YanivDQNModel')
dqn_agent = models.load('yaniv-dqn').agents[0]
env.set_agents([dqn_agent, random_agent])

print(">> Yaniv pre-trained model")

while (True):
    print(">> Start a new game")

    trajectories, payoffs = env.run(is_training=False)

    final_state = trajectories[0][-1][-2]

    #
    # print('===============     Result     ===============')
    # if payoffs[0] > 0:
    #     print('You win {} chips!'.format(payoffs[0]))
    # elif payoffs[0] == 0:
    #     print('It is a tie.')
    # else:
    #     print('You lose {} chips!'.format(-payoffs[0]))
    # print('')
예제 #16
0
save_plot_every = 1000
evaluate_num = 10000
episode_num = 10000

# The paths for saving the logs and learning curves
log_dir = './experiments/leduc_holdem_cfr_result/'

# Set a global seed
set_global_seed(0)

# Initilize CFR Agent
agent = CFRAgent(env)
agent.load()  # If we have saved model, we first load the model

# Evaluate CFR against pre-trained NFSP
eval_env.set_agents([agent, models.load('leduc-holdem-nfsp').agents[0]])

# Init a Logger to plot the learning curve
logger = Logger(log_dir)

for episode in range(episode_num):
    agent.train()
    print('\rIteration {}'.format(episode), end='')
    # Evaluate the performance. Play with NFSP agents.
    if episode % evaluate_every == 0:
        agent.save()  # Save model
        logger.log_performance(env.timestep,
                               tournament(eval_env, evaluate_num)[0])

# Close files in the logger
logger.close_files()
예제 #17
0
from rlcard.utils import set_global_seed

from rlcard.games.gin_rummy.player import GinRummyPlayer
from rlcard.games.gin_rummy.utils.move import DealHandMove

# Make environment
env = rlcard.make('gin-rummy', config={'seed': 0})
episode_num = 1
env.game.settings.print_settings()

# Set a global seed
set_global_seed(0)

# Set up agents
agents = models.load("gin-rummy-novice-rule").agents  # use novice agents rather than random agents
env.set_agents(agents)

for episode in range(episode_num):

    # Generate data from the environment
    trajectories, _ = env.run(is_training=False)

    # Print out the trajectories
    print('\nEpisode {}'.format(episode))
    for ts in trajectories[0]:
        print('State: {}, Action: {}, Reward: {}, Next State: {}, Done: {}'.format(ts[0], ts[1], ts[2], ts[3], ts[4]))

    # print move sheet
    print("\n========== Move Sheet ==========")
    move_sheet = env.game.round.move_sheet
예제 #18
0
nfsp_agent = load_nfsp_leduc_agent(nfsp_model_path)
print("loaded NFSP leduc agent")

dqn_agent = load_dqn_leduc_agent('models/leduc_holdem_dqn')
print("loaded DQN leduc agent")

dqn_agent_rb = load_dqn_leduc_agent('models/leduc_holdem_dqn_rule_based')
print("loaded DQN RuleBased leduc agent")

cfr_agent = load_cfr_leduc_agent('models/cfr_model')
print("loaded CFR leduc agent")

cfr_agent_rb = load_cfr_leduc_agent('models/cfr_rule_based_model')
print("loaded CFR rule based leduc agent")

rule_based_agent = models.load('leduc-holdem-rule-v1').rule_agents[0]
print("loaded Leduc Rule Based Agent")

random_agent = RandomAgent(action_num=env.action_num)

# agents = [nfsp_agent, dqn_agent, dqn_agent_rb, cfr_agent, cfr_agent_rb, random_agent, rule_based_agent]
agents_dict = {
    "NFSP": nfsp_agent,
    "DQN": dqn_agent,
    "DQN Rule Based": dqn_agent_rb,
    "CFR": cfr_agent,
    "CFR Rule Based": cfr_agent_rb,
    "Random": random_agent,
    "Rule Based": rule_based_agent
}
예제 #19
0
import rlcard
from rlcard import models
from rlcard.agents import LeducholdemHumanAgent as HumanAgent
from rlcard.agents import NolimitholdemHumanAgent as HumanAgent
from rlcard.utils import print_card, set_global_seed

# Make environment
# Set 'record_action' to True because we need it to print results
env = rlcard.make('no-limit-holdem', config={'record_action': True})
human_agent = HumanAgent(action_num=env.action_num)
nfsp_agent = models.load('nolimit-holdem-nfsp').agents[0]
env.set_agents([human_agent, nfsp_agent])

print(">> No Limit Hold'em pre-trained model")

while (True):
    print(">> Start a new game")

    trajectories, payoffs = env.run(is_training=False)
    # If the human does not take the final action, we need to
    # print other players action
    final_state = trajectories[0][-1][-2]
    action_record = final_state['action_record']
    state = final_state['raw_obs']
    _action_list = []
    for i in range(1, len(action_record) + 1):
        if action_record[-i][0] == state['current_player']:
            break
        _action_list.insert(0, action_record[-i])
    for pair in _action_list:
        print('>> Player', pair[0], 'chooses', pair[1])
예제 #20
0
''' A toy example of playing against pretrianed AI on Leduc Hold'em
'''

import rlcard
from rlcard import models
from rlcard.agents import LeducholdemHumanAgent as HumanAgent
from rlcard.utils import print_card

# Make environment
env = rlcard.make('leduc-holdem')
human_agent = HumanAgent(env.num_actions)
cfr_agent = models.load('leduc-holdem-cfr').agents[0]
env.set_agents([human_agent, cfr_agent])

print(">> Leduc Hold'em pre-trained model")

while (True):
    print(">> Start a new game")

    trajectories, payoffs = env.run(is_training=False)
    # If the human does not take the final action, we need to
    # print other players action
    final_state = trajectories[0][-1]
    action_record = final_state['action_record']
    state = final_state['raw_obs']
    _action_list = []
    for i in range(1, len(action_record) + 1):
        if action_record[-i][0] == state['current_player']:
            break
        _action_list.insert(0, action_record[-i])
    for pair in _action_list:
예제 #21
0
''' A toy example of playing against rule-based bot on UNO
'''

import rlcard
from rlcard import models
from rlcard.agents.uno_human_agent import HumanAgent, _print_action

# Make environment and enable human mode
# Set 'record_action' to True because we need it to print results
env = rlcard.make('uno', config={'record_action': True})
human_agent = HumanAgent(env.action_num)
cfr_agent = models.load('uno-rule-v1').agents[0]
env.set_agents([human_agent, cfr_agent])

print(">> UNO rule model V1")

while (True):
    print(">> Start a new game")

    trajectories, payoffs = env.run(is_training=False)
    # If the human does not take the final action, we need to
    # print other players action
    final_state = trajectories[0][-1][-2]
    action_record = final_state['action_record']
    state = final_state['raw_obs']
    _action_list = []
    for i in range(1, len(action_record) + 1):
        if action_record[-i][0] == state['current_player']:
            break
        _action_list.insert(0, action_record[-i])
    for pair in _action_list:
예제 #22
0
import rlcard
from rlcard import models
from rlcard.agents import LimitholdemHumanAgent as HumanAgent
from rlcard.utils import print_card

# Make environment
# Set 'record_action' to True because we need it to print results
env = rlcard.make('limit-holdem', config={'seed': 0, 'record_action': True})
human_agent = HumanAgent(env.action_num)
cfr_agent = models.load('limit-holdem-dqn').agents[0]
env.set_agents([human_agent, cfr_agent])

print(">> Limit Hold'em pre-trained model")

while (True):
    print(">> Start a new game")

    trajectories, payoffs = env.run(is_training=False)
    # If the human does not take the final action, we need to
    # print other players action
    if len(trajectories[0]) != 0:
        final_state = trajectories[0][-1][-2]
        action_record = final_state['action_record']
        state = final_state['raw_obs']
        _action_list = []
        for i in range(1, len(action_record) + 1):
            """
            if action_record[-i][0] == state['current_player']:
                break
            """
            _action_list.insert(0, action_record[-i])
예제 #23
0
''' A toy example of playing against rule-based bot on UNO
'''
import tensorflow as tf
import rlcard
from rlcard import models
from rlcard.agents import DQNAgent
from rlcard.agents.uno_human_agent import HumanAgent, _print_action

# Make environment and enable human mode
# Set 'record_action' to True because we need it to print results
env = rlcard.make('uno', config={'record_action': True})
human_agent = HumanAgent(env.action_num)
dqn_agent = models.load('uno-dqn').agents[0]
env.set_agents([human_agent, dqn_agent])

print(">> UNO dqn")

while (True):
    print(">> Start a new game")

    trajectories, payoffs = env.run(is_training=False)
    # If the human does not take the final action, we need to
    # print other players action
    final_state = trajectories[0][-1][-2]
    action_record = final_state['action_record']
    state = final_state['raw_obs']
    _action_list = []
    for i in range(1, len(action_record) + 1):
        if action_record[-i][0] == state['current_player']:
            break
        _action_list.insert(0, action_record[-i])
''' Another example of loading a pre-trained NFSP model on Leduc Hold'em
    Here, we directly load the model from model zoo
'''
import rlcard
from rlcard.agents.random_agent import RandomAgent
from rlcard.utils.utils import set_global_seed, tournament
from rlcard import models

# Make environment
env = rlcard.make('leduc-holdem')

# Set a global seed
set_global_seed(0)

# Here we directly load NFSP models from /models module
nfsp_agents = models.load('leduc-holdem-nfsp-pytorch').agents

# Evaluate the performance. Play with random agents.
evaluate_num = 10000
random_agent = RandomAgent(env.action_num)
env.set_agents([nfsp_agents[0], random_agent])
reward = tournament(env, evaluate_num)[0]
print('Average reward against random agent: ', reward)