def test_tournament(self): env = rlcard3.make('leduc-holdem') env.set_agents( [RandomAgent(env.action_num), RandomAgent(env.action_num)]) payoffs = tournament(env, 1000) self.assertEqual(len(payoffs), 2)
def test_get_legal_actions(): env = rlcard3.make('mocsar') print(f"Env:{env} test_get_legal_actions") env.set_agents([ RandomAgent(action_num=env.action_num), RandomAgent(action_num=env.action_num) ]) env.init_game() legal_actions = env._get_legal_actions() for legal_action in legal_actions: assert legal_action <= env.game.get_action_num()
def test_run(self): env = rlcard3.make('blackjack') env.set_agents([RandomAgent(env.action_num)]) trajectories, _ = env.run(is_training=False) self.assertEqual(len(trajectories), 1) trajectories, _ = env.run(is_training=True, seed=1) self.assertEqual(len(trajectories), 1)
def test_get_legal_actions(self): env = rlcard3.make('mahjong') env.set_agents( [RandomAgent(env.action_num) for _ in range(env.player_num)]) env.init_game() legal_actions = env._get_legal_actions() for legal_action in legal_actions: self.assertLessEqual(legal_action, env.action_num - 1)
def test_run(self): env = rlcard3.make('no-limit-holdem') agents = [RandomAgent(env.action_num) for _ in range(env.player_num)] env.set_agents(agents) trajectories, payoffs = env.run(is_training=False) self.assertEqual(len(trajectories), 2) total = 0 for payoff in payoffs: total += payoff self.assertEqual(total, 0)
def test_run(): env = rlcard3.make('mocsar') print(f"Env:{env} test_run") env.set_agents([ RandomAgent(action_num=env.action_num), RandomAgent(action_num=env.action_num), RandomAgent(action_num=env.action_num), RandomAgent(action_num=env.action_num) ]) trajectories, payoffs = env.run(is_training=False) assert len(trajectories) == 4 # There are four players total = 0 for payoff in payoffs: total += payoff assert total == 0 yield trajectories, payoffs = env.run(is_training=True) total = 0 for payoff in payoffs: total += payoff assert total == 0 yield
def test_run(self): env = rlcard3.make('mahjong') env.set_agents( [RandomAgent(env.action_num) for _ in range(env.player_num)]) trajectories, payoffs = env.run(is_training=False) self.assertEqual(len(trajectories), 4) total = 0 for payoff in payoffs: total += payoff self.assertEqual(total, 0) trajectories, payoffs = env.run(is_training=True, seed=1) total = 0 for payoff in payoffs: total += payoff self.assertEqual(total, 0)
def test_run(self): env = rlcard3.make('doudizhu') env.set_agents( [RandomAgent(env.action_num) for _ in range(env.player_num)]) trajectories, payoffs = env.run(is_training=False) self.assertEqual(len(trajectories), 3) win = [] for player_id, payoff in enumerate(payoffs): if payoff == 1: win.append(player_id) if len(win) == 1: self.assertEqual(env.game.players[win[0]].role, 'landlord') if len(win) == 2: self.assertEqual(env.game.players[win[0]].role, 'peasant') self.assertEqual(env.game.players[win[1]].role, 'peasant')
def normalize(self, e, num): """ Feed random data to normalizer Args: e (Env): AN Env class num (int): The number of steps to be normalized """ print('**********Normalize begin**************') begin_step = e.timestep e.set_agents([RandomAgent() for _ in range(e.player_num)]) while e.timestep - begin_step < num: trajectories, _ = e.run(is_training=False) for tra in trajectories: for ts in tra: self.agent.feed(ts) print('**********Normalize end**************')
''' An example of playing Leduc Hold'em with random agents ''' import rlcard3 from rlcard3.agents.random_agent import RandomAgent from rlcard3.utils.utils import * # Make environment env = rlcard3.make('leduc-holdem') episode_num = 2 # Set a global seed set_global_seed(0) # Set up agents agent = RandomAgent(action_num=env.action_num) env.set_agents([agent, agent]) for episode in range(episode_num): # Generate data from the environment trajectories, _ = env.run(is_training=False) # Print out the trajectories print('\nEpisode {}'.format(episode)) for ts in trajectories[0]: print('State: {}, Action: {}, Reward: {}, Next State: {}, Done: {}'. format(ts[0], ts[1], ts[2], ts[3], ts[4]))
conf=conf) # The paths for saving the logs and learning curves log_dir = './experiments/mocsar_dqn_ra_pytorch_result/' # Set a global seed set_global_seed(0) agent = DQNAgent(scope='dqn', action_num=env.action_num, replay_memory_init_size=memory_init_size, train_every=train_every, state_shape=env.state_shape, mlp_layers=[512, 512], device=torch.device('cuda')) random_agent = RandomAgent(action_num=eval_env.action_num) # Other agents env.model.create_agents({"mocsar_min": 4}) env_agent_list = [env.model.rule_agents[i] for i in range(1, 4)] env_agent_list.insert(0, agent) env.set_agents(env_agent_list) # Evaluation agent eval_env.model.create_agents({"mocsar_random": 4}) eval_agent_list = [eval_env.model.rule_agents[i] for i in range(1, 4)] eval_agent_list.insert(0, agent) eval_env.set_agents(eval_agent_list) # Init a Logger to plot the learning curve logger = Logger(log_dir)
episode_num = 10000 # Assign tasks per_tasks = assign_task(episode_num, process_num) # Set game and make environment game = 'doudizhu' env = rlcard3.make(game) # Set global seed set_global_seed(1) # Set up agents agent_num = env.player_num env.set_agents( [RandomAgent(action_num=env.action_num) for _ in range(agent_num)]) # Set a global list to reserve trajectories manager = multiprocessing.Manager() trajectories_set = manager.list() # Generate Processes processes = [] for p in range(process_num): process = multiprocessing.Process(target=env.run_multi, args=(per_tasks[p], trajectories_set)) processes.append(process) # Run process for p in processes:
# Load pretrained model graph = tf.Graph() sess = tf.Session(graph=graph) with graph.as_default(): nfsp_agents = [] for i in range(env.player_num): agent = NFSPAgent(sess, scope='nfsp' + str(i), action_num=env.action_num, state_shape=env.state_shape, hidden_layers_sizes=[128,128], q_mlp_layers=[128,128]) nfsp_agents.append(agent) # We have a pretrained model here. Change the path for your model. check_point_path = os.path.join(rlcard3.__path__[0], 'models/pretrained/leduc_holdem_nfsp') with sess.as_default(): with graph.as_default(): saver = tf.train.Saver() saver.restore(sess, tf.train.latest_checkpoint(check_point_path)) # Evaluate the performance. Play with random agents. evaluate_num = 10000 random_agent = RandomAgent(env.action_num) env.set_agents([nfsp_agents[0], random_agent]) reward = tournament(env, evaluate_num)[0] print('Average reward against random agent: ', reward)
''' A toy example of playing Uno with random agents ''' import rlcard3 from rlcard3.agents.random_agent import RandomAgent from rlcard3.utils.utils import set_global_seed # Make environment env = rlcard3.make('uno') episode_num = 2 # Set a global seed set_global_seed(0) # Set up agents agent_0 = RandomAgent(action_num=env.action_num) agent_1 = RandomAgent(action_num=env.action_num) agent_2 = RandomAgent(action_num=env.action_num) agent_3 = RandomAgent(action_num=env.action_num) env.set_agents([agent_0, agent_1, agent_2, agent_3]) for episode in range(episode_num): # Generate data from the environment trajectories, _ = env.run(is_training=False) # Print out the trajectories print('\nEpisode {}'.format(episode)) for ts in trajectories[0]: print('State: {}, Action: {}, Reward: {}, Next State: {}, Done: {}'. format(ts[0], ts[1], ts[2], ts[3], ts[4]))