def test_load(self): register( model_id='test_load', entry_point='rlcard.models.pretrained_models:LeducHoldemNFSPModel') models.load('test_load') with self.assertRaises(ValueError): load('test_random_make')
def load_model(self) -> dict: """ Load pretrained/rule model :return: a dictionary with three models corresponding to each game part """ return { 'BID': models.load('tarot-bid-rule-v1'), 'DOG': models.load('tarot-dog-rule-v1'), 'MAIN': models.load('tarot-rule-v1') }
def _load_model(self): ''' Load pretrained/rule model Returns: model (Model): A Model object ''' return models.load('leduc-holdem-cfr')
def load_model(self): ''' Load pretrained/rule model Returns: model (Model): A Model object ''' return models.load(self.modelname)
def _load_model(self): ''' Load pretrained/rule model Returns: model (Model): A Model object ''' return models.load('uno-rule-v1')
def _load_model(self): ''' Load pretrained/rule model Returns: model (Model): A Model object ''' from rlcard import models return models.load('kuhn-poker-cfr')
def _load_model(self): ''' Load pretrained/rule model Returns: model (Model): A Model object ''' from rlcard import models return models.load('whale-rule-v1')
def _load_model(self): ''' Load pretrained/rule model Returns: model (Model): A Model object ''' from rlcard import models #return models.load('nolimit-holdem-random') return models.load('nolimit-holdem-tp')
def _load_model(self): ''' Load pretrained/rule model Returns: model (Model): A Model object ''' assert False # FIXME: stub return models.load('uno-rule-v1') # FIXME: stub
def _load_model(self): """Load pretrained/rule model Returns: model (Model): A Model object """ from rlcard import models return models.load("leduc-holdem-cfr")
def load_model(model_path, env=None, position=None, device=None): if os.path.isfile(model_path): # Torch model import torch agent = torch.load(model_path, map_location=device) agent.set_device(device) elif os.path.isdir(model_path): # CFR model from rlcard.agents import CFRAgent agent = CFRAgent(env, model_path) agent.load() elif model_path == 'random': # Random model from rlcard.agents import RandomAgent agent = RandomAgent(num_actions=env.num_actions) else: # A model in the model zoo from rlcard import models agent = models.load(model_path).agents[position] return agent
def train_leduc(): # Make environment and enable human mode env = rlcard.make('leduc-holdem', config={ 'seed': 0, 'allow_step_back': True }) eval_env = rlcard.make('leduc-holdem', config={'seed': 0}) # Set the iterations numbers and how frequently we evaluate the performance and save model evaluate_every = 100 save_plot_every = 1000 evaluate_num = 10000 episode_num = 10000 # The paths for saving the logs and learning curves log_dir = './experiments/leduc_holdem_oscfr_result/' # Set a global seed set_global_seed(0) # Initilize CFR Agent model_path = 'models/leduc_holdem_oscfr' agent = OutcomeSampling_CFR(env, model_path=model_path) agent.load() # If we have saved model, we first load the model # Evaluate CFR against pre-trained NFSP eval_env.set_agents([agent, models.load('leduc-holdem-nfsp').agents[0]]) # Init a Logger to plot the learning curve logger = Logger(log_dir) for episode in range(episode_num): agent.train() print('\rIteration {}'.format(episode), end='') # Evaluate the performance. Play with NFSP agents. if episode % evaluate_every == 0: agent.save() # Save model logger.log_performance(env.timestep, tournament(eval_env, evaluate_num)[0]) # Close files in the logger logger.close_files() # Plot the learning curve logger.plot('OSCFR')
def main(): # Make environment env = rlcard.make('leduc-holdem', config={'seed': 0, 'env_num': 4}) eval_env = rlcard.make('leduc-holdem', config={'seed': 0, 'env_num': 4}) # Set the iterations numbers and how frequently we evaluate the performance evaluate_every = 100 evaluate_num = 10000 episode_num = 800000 # The intial memory size memory_init_size = 1000 # Train the agent every X steps train_every = 1 _reward_max = -0.5 # The paths for saving the logs and learning curves log_dir = './experiments/leduc_holdem_dqn_result/' # Set a global seed set_global_seed(0) with tf.Session() as sess: # Initialize a global step global_step = tf.Variable(0, name='global_step', trainable=False) # Set up the agents agent = DQNAgent(sess, scope='dqn', action_num=env.action_num, replay_memory_init_size=memory_init_size, train_every=train_every, state_shape=env.state_shape, mlp_layers=[128, 128]) # random_agent = RandomAgent(action_num=eval_env.action_num) cfr_agent = models.load('leduc-holdem-cfr').agents[0] env.set_agents([agent, agent]) eval_env.set_agents([agent, cfr_agent]) # Initialize global variables sess.run(tf.global_variables_initializer()) # Init a Logger to plot the learning curve logger = Logger(log_dir) saver = tf.train.Saver() save_dir = 'models/leduc_holdem_dqn' saver.restore(sess, os.path.join(save_dir, 'model')) for episode in range(episode_num): # Generate data from the environment trajectories, _ = env.run(is_training=True) # Feed transitions into agent memory, and train the agent for ts in trajectories[0]: agent.feed(ts) # Evaluate the performance. Play with random agents. if episode % evaluate_every == 0: _reward = tournament(eval_env, evaluate_num)[0] logger.log_performance(episode, _reward) if _reward > _reward_max: # Save model if not os.path.exists(save_dir): os.makedirs(save_dir) saver.save(sess, os.path.join(save_dir, 'model')) _reward_max = _reward # Close files in the logger logger.close_files() # Plot the learning curve logger.plot('DQN')
''' Another example of loading a pre-trained NFSP model on Leduc Hold'em Here, we directly load the model from model zoo ''' import rlcard from rlcard.agents import RandomAgent from rlcard.utils import set_global_seed, tournament from rlcard import models # Make environment env = rlcard.make('leduc-holdem', config={'seed': 0}) # Set a global seed set_global_seed(0) # Here we directly load NFSP models from /models module nfsp_agents = models.load('leduc-holdem-nfsp').agents # Evaluate the performance. Play with random agents. evaluate_num = 10000 random_agent = RandomAgent(env.action_num) env.set_agents([nfsp_agents[0], random_agent]) reward = tournament(env, evaluate_num)[0] print('Average reward against random agent: ', reward)
from rlcard import models from yaniv_env import YanivEnv from rlcard.agents.random_agent import RandomAgent config = {} env = YanivEnv(config) random_agent = RandomAgent(action_num=env.action_num) models.register('yaniv-dqn', 'pretrained_model:YanivDQNModel') dqn_agent = models.load('yaniv-dqn').agents[0] env.set_agents([dqn_agent, random_agent]) print(">> Yaniv pre-trained model") while (True): print(">> Start a new game") trajectories, payoffs = env.run(is_training=False) final_state = trajectories[0][-1][-2] # # print('=============== Result ===============') # if payoffs[0] > 0: # print('You win {} chips!'.format(payoffs[0])) # elif payoffs[0] == 0: # print('It is a tie.') # else: # print('You lose {} chips!'.format(-payoffs[0])) # print('')
save_plot_every = 1000 evaluate_num = 10000 episode_num = 10000 # The paths for saving the logs and learning curves log_dir = './experiments/leduc_holdem_cfr_result/' # Set a global seed set_global_seed(0) # Initilize CFR Agent agent = CFRAgent(env) agent.load() # If we have saved model, we first load the model # Evaluate CFR against pre-trained NFSP eval_env.set_agents([agent, models.load('leduc-holdem-nfsp').agents[0]]) # Init a Logger to plot the learning curve logger = Logger(log_dir) for episode in range(episode_num): agent.train() print('\rIteration {}'.format(episode), end='') # Evaluate the performance. Play with NFSP agents. if episode % evaluate_every == 0: agent.save() # Save model logger.log_performance(env.timestep, tournament(eval_env, evaluate_num)[0]) # Close files in the logger logger.close_files()
from rlcard.utils import set_global_seed from rlcard.games.gin_rummy.player import GinRummyPlayer from rlcard.games.gin_rummy.utils.move import DealHandMove # Make environment env = rlcard.make('gin-rummy', config={'seed': 0}) episode_num = 1 env.game.settings.print_settings() # Set a global seed set_global_seed(0) # Set up agents agents = models.load("gin-rummy-novice-rule").agents # use novice agents rather than random agents env.set_agents(agents) for episode in range(episode_num): # Generate data from the environment trajectories, _ = env.run(is_training=False) # Print out the trajectories print('\nEpisode {}'.format(episode)) for ts in trajectories[0]: print('State: {}, Action: {}, Reward: {}, Next State: {}, Done: {}'.format(ts[0], ts[1], ts[2], ts[3], ts[4])) # print move sheet print("\n========== Move Sheet ==========") move_sheet = env.game.round.move_sheet
nfsp_agent = load_nfsp_leduc_agent(nfsp_model_path) print("loaded NFSP leduc agent") dqn_agent = load_dqn_leduc_agent('models/leduc_holdem_dqn') print("loaded DQN leduc agent") dqn_agent_rb = load_dqn_leduc_agent('models/leduc_holdem_dqn_rule_based') print("loaded DQN RuleBased leduc agent") cfr_agent = load_cfr_leduc_agent('models/cfr_model') print("loaded CFR leduc agent") cfr_agent_rb = load_cfr_leduc_agent('models/cfr_rule_based_model') print("loaded CFR rule based leduc agent") rule_based_agent = models.load('leduc-holdem-rule-v1').rule_agents[0] print("loaded Leduc Rule Based Agent") random_agent = RandomAgent(action_num=env.action_num) # agents = [nfsp_agent, dqn_agent, dqn_agent_rb, cfr_agent, cfr_agent_rb, random_agent, rule_based_agent] agents_dict = { "NFSP": nfsp_agent, "DQN": dqn_agent, "DQN Rule Based": dqn_agent_rb, "CFR": cfr_agent, "CFR Rule Based": cfr_agent_rb, "Random": random_agent, "Rule Based": rule_based_agent }
import rlcard from rlcard import models from rlcard.agents import LeducholdemHumanAgent as HumanAgent from rlcard.agents import NolimitholdemHumanAgent as HumanAgent from rlcard.utils import print_card, set_global_seed # Make environment # Set 'record_action' to True because we need it to print results env = rlcard.make('no-limit-holdem', config={'record_action': True}) human_agent = HumanAgent(action_num=env.action_num) nfsp_agent = models.load('nolimit-holdem-nfsp').agents[0] env.set_agents([human_agent, nfsp_agent]) print(">> No Limit Hold'em pre-trained model") while (True): print(">> Start a new game") trajectories, payoffs = env.run(is_training=False) # If the human does not take the final action, we need to # print other players action final_state = trajectories[0][-1][-2] action_record = final_state['action_record'] state = final_state['raw_obs'] _action_list = [] for i in range(1, len(action_record) + 1): if action_record[-i][0] == state['current_player']: break _action_list.insert(0, action_record[-i]) for pair in _action_list: print('>> Player', pair[0], 'chooses', pair[1])
''' A toy example of playing against pretrianed AI on Leduc Hold'em ''' import rlcard from rlcard import models from rlcard.agents import LeducholdemHumanAgent as HumanAgent from rlcard.utils import print_card # Make environment env = rlcard.make('leduc-holdem') human_agent = HumanAgent(env.num_actions) cfr_agent = models.load('leduc-holdem-cfr').agents[0] env.set_agents([human_agent, cfr_agent]) print(">> Leduc Hold'em pre-trained model") while (True): print(">> Start a new game") trajectories, payoffs = env.run(is_training=False) # If the human does not take the final action, we need to # print other players action final_state = trajectories[0][-1] action_record = final_state['action_record'] state = final_state['raw_obs'] _action_list = [] for i in range(1, len(action_record) + 1): if action_record[-i][0] == state['current_player']: break _action_list.insert(0, action_record[-i]) for pair in _action_list:
''' A toy example of playing against rule-based bot on UNO ''' import rlcard from rlcard import models from rlcard.agents.uno_human_agent import HumanAgent, _print_action # Make environment and enable human mode # Set 'record_action' to True because we need it to print results env = rlcard.make('uno', config={'record_action': True}) human_agent = HumanAgent(env.action_num) cfr_agent = models.load('uno-rule-v1').agents[0] env.set_agents([human_agent, cfr_agent]) print(">> UNO rule model V1") while (True): print(">> Start a new game") trajectories, payoffs = env.run(is_training=False) # If the human does not take the final action, we need to # print other players action final_state = trajectories[0][-1][-2] action_record = final_state['action_record'] state = final_state['raw_obs'] _action_list = [] for i in range(1, len(action_record) + 1): if action_record[-i][0] == state['current_player']: break _action_list.insert(0, action_record[-i]) for pair in _action_list:
import rlcard from rlcard import models from rlcard.agents import LimitholdemHumanAgent as HumanAgent from rlcard.utils import print_card # Make environment # Set 'record_action' to True because we need it to print results env = rlcard.make('limit-holdem', config={'seed': 0, 'record_action': True}) human_agent = HumanAgent(env.action_num) cfr_agent = models.load('limit-holdem-dqn').agents[0] env.set_agents([human_agent, cfr_agent]) print(">> Limit Hold'em pre-trained model") while (True): print(">> Start a new game") trajectories, payoffs = env.run(is_training=False) # If the human does not take the final action, we need to # print other players action if len(trajectories[0]) != 0: final_state = trajectories[0][-1][-2] action_record = final_state['action_record'] state = final_state['raw_obs'] _action_list = [] for i in range(1, len(action_record) + 1): """ if action_record[-i][0] == state['current_player']: break """ _action_list.insert(0, action_record[-i])
''' A toy example of playing against rule-based bot on UNO ''' import tensorflow as tf import rlcard from rlcard import models from rlcard.agents import DQNAgent from rlcard.agents.uno_human_agent import HumanAgent, _print_action # Make environment and enable human mode # Set 'record_action' to True because we need it to print results env = rlcard.make('uno', config={'record_action': True}) human_agent = HumanAgent(env.action_num) dqn_agent = models.load('uno-dqn').agents[0] env.set_agents([human_agent, dqn_agent]) print(">> UNO dqn") while (True): print(">> Start a new game") trajectories, payoffs = env.run(is_training=False) # If the human does not take the final action, we need to # print other players action final_state = trajectories[0][-1][-2] action_record = final_state['action_record'] state = final_state['raw_obs'] _action_list = [] for i in range(1, len(action_record) + 1): if action_record[-i][0] == state['current_player']: break _action_list.insert(0, action_record[-i])
''' Another example of loading a pre-trained NFSP model on Leduc Hold'em Here, we directly load the model from model zoo ''' import rlcard from rlcard.agents.random_agent import RandomAgent from rlcard.utils.utils import set_global_seed, tournament from rlcard import models # Make environment env = rlcard.make('leduc-holdem') # Set a global seed set_global_seed(0) # Here we directly load NFSP models from /models module nfsp_agents = models.load('leduc-holdem-nfsp-pytorch').agents # Evaluate the performance. Play with random agents. evaluate_num = 10000 random_agent = RandomAgent(env.action_num) env.set_agents([nfsp_agents[0], random_agent]) reward = tournament(env, evaluate_num)[0] print('Average reward against random agent: ', reward)