def __init__(self): ''' Load pretrained model ''' env = rlcard.make('leduc-holdem') self.agent = CFRAgent(env, model_path=os.path.join(ROOT_PATH, 'leduc_holdem_cfr')) self.agent.load()
def train(args): # Make environments, CFR only supports Leduc Holdem env = rlcard.make('leduc-holdem', config={'seed': 0, 'allow_step_back':True}) eval_env = rlcard.make('leduc-holdem', config={'seed': 0}) # Seed numpy, torch, random set_seed(args.seed) # Initilize CFR Agent agent = CFRAgent(env, os.path.join(args.log_dir, 'cfr_model')) agent.load() # If we have saved model, we first load the model # Evaluate CFR against random eval_env.set_agents([agent, RandomAgent(num_actions=env.num_actions)]) # Start training with Logger(args.log_dir) as logger: for episode in range(args.num_episodes): agent.train() print('\rIteration {}'.format(episode), end='') # Evaluate the performance. Play with Random agents. if episode % args.evaluate_every == 0: agent.save() # Save model logger.log_performance(env.timestep, tournament(eval_env, args.num_eval_games)[0]) # Get the paths csv_path, fig_path = logger.csv_path, logger.fig_path # Plot the learning curve plot_curve(csv_path, fig_path, 'cfr')
def load_model(model_path, env=None, position=None, device=None): if os.path.isfile(model_path): # Torch model import torch agent = torch.load(model_path, map_location=device) agent.set_device(device) elif os.path.isdir(model_path): # CFR model from rlcard.agents import CFRAgent agent = CFRAgent(env, model_path) agent.load() elif model_path == 'random': # Random model from rlcard.agents import RandomAgent agent = RandomAgent(num_actions=env.num_actions) else: # A model in the model zoo from rlcard import models agent = models.load(model_path).agents[position] return agent
class LeducHoldemCFRModel(Model): ''' A pretrained model on Leduc Holdem with CFR (chance sampling) ''' def __init__(self): ''' Load pretrained model ''' env = rlcard.make('leduc-holdem') self.agent = CFRAgent(env, model_path=os.path.join(ROOT_PATH, 'leduc_holdem_cfr')) self.agent.load() @property def agents(self): ''' Get a list of agents for each position in a the game Returns: agents (list): A list of agents Note: Each agent should be just like RL agent with step and eval_step functioning well. ''' return [self.agent, self.agent]
def train_leduc(): # Make environment and enable human mode env = rlcard.make('leduc-holdem', config={'seed': 0, 'allow_step_back':True}) eval_env = rlcard.make('leduc-holdem', config={'seed': 0}) # Set the iterations numbers and how frequently we evaluate the performance and save model evaluate_every = 100 save_plot_every = 1000 evaluate_num = 10000 episode_num = 10000 # The paths for saving the logs and learning curves log_dir = './experiments/leduc_holdem_cfr_result/' # Set a global seed set_global_seed(0) model_path = 'models/leduc_holdem_cfr' # Initilize CFR Agent agent = CFRAgent(env,model_path=model_path) agent.load() # If we have saved model, we first load the model # Evaluate CFR against pre-trained NFSP eval_env.set_agents([agent, models.load('leduc-holdem-nfsp').agents[0]]) # Init a Logger to plot the learning curve logger = Logger(log_dir) for episode in range(episode_num): agent.train() print('\rIteration {}'.format(episode), end='') # Evaluate the performance. Play with NFSP agents. if episode % evaluate_every == 0: agent.save() # Save model logger.log_performance(env.timestep, tournament(eval_env, evaluate_num)[0]) # Close files in the logger logger.close_files() # Plot the learning curve logger.plot('CFR')
eval_env = rlcard.make('leduc-holdem', config={'seed': 0}) # Set the iterations numbers and how frequently we evaluate the performance and save model evaluate_every = 100 save_plot_every = 1000 evaluate_num = 10000 episode_num = 10000 # The paths for saving the logs and learning curves log_dir = './experiments/leduc_holdem_cfr_result/' # Set a global seed set_global_seed(0) # Initilize CFR Agent agent = CFRAgent(env) agent.load() # If we have saved model, we first load the model # Evaluate CFR against pre-trained NFSP eval_env.set_agents([agent, models.load('leduc-holdem-nfsp').agents[0]]) # Init a Logger to plot the learning curve logger = Logger(log_dir) for episode in range(episode_num): agent.train() print('\rIteration {}'.format(episode), end='') # Evaluate the performance. Play with NFSP agents. if episode % evaluate_every == 0: agent.save() # Save model logger.log_performance(env.timestep,
# The intial memory size memory_init_size = 1000 # Train the agent every X steps train_every = 1 # The paths for saving the logs and learning curves log_dir = './experiments/cfr_result/' # Set a global seed set_global_seed(0) # Set up the agents agents = [] for i in range(env.player_num): agent = CFRAgent(env=env) agents.append(agent) random_agent = RandomAgent(action_num=eval_env.action_num) env.set_agents(agents) eval_env.set_agents([agents[0], random_agent]) # Init a Logger to plot the learning curve logger = Logger(log_dir) for episode in range(episode_num): for agent in agents: agent.train() print('\rIteration {}'.format(episode), end='') # Evaluate the performance. Play with random agents.
episode_num = 100000 # The intial memory size memory_init_size = 1000 # Train the agent every X steps train_every = 1 # The paths for saving the logs and learning curves log_dir = './experiments/cfr_random_result/' # Set a global seed set_global_seed(0) # Set up the agents agent = CFRAgent(env=env, model_path='./cfr_random_model') random_agent = RandomAgent(action_num=eval_env.action_num) env.set_agents([agent, random_agent]) eval_env.set_agents([agent, random_agent]) # Init a Logger to plot the learning curve logger = Logger(log_dir) for episode in range(episode_num): agent.train() print('\rIteration {}'.format(episode), end='') # Evaluate the performance. Play with random agents. if episode % evaluate_every == 0: agent.save() # Save model