Exemple #1
0
 def test_tournament(self):
     env = rlcard3.make('leduc-holdem')
     env.set_agents(
         [RandomAgent(env.action_num),
          RandomAgent(env.action_num)])
     payoffs = tournament(env, 1000)
     self.assertEqual(len(payoffs), 2)
Exemple #2
0
def test_get_legal_actions():
    env = rlcard3.make('mocsar')
    print(f"Env:{env} test_get_legal_actions")
    env.set_agents([
        RandomAgent(action_num=env.action_num),
        RandomAgent(action_num=env.action_num)
    ])
    env.init_game()
    legal_actions = env._get_legal_actions()
    for legal_action in legal_actions:
        assert legal_action <= env.game.get_action_num()
 def test_run(self):
     env = rlcard3.make('blackjack')
     env.set_agents([RandomAgent(env.action_num)])
     trajectories, _ = env.run(is_training=False)
     self.assertEqual(len(trajectories), 1)
     trajectories, _ = env.run(is_training=True, seed=1)
     self.assertEqual(len(trajectories), 1)
Exemple #4
0
 def test_get_legal_actions(self):
     env = rlcard3.make('mahjong')
     env.set_agents(
         [RandomAgent(env.action_num) for _ in range(env.player_num)])
     env.init_game()
     legal_actions = env._get_legal_actions()
     for legal_action in legal_actions:
         self.assertLessEqual(legal_action, env.action_num - 1)
Exemple #5
0
 def test_run(self):
     env = rlcard3.make('no-limit-holdem')
     agents = [RandomAgent(env.action_num) for _ in range(env.player_num)]
     env.set_agents(agents)
     trajectories, payoffs = env.run(is_training=False)
     self.assertEqual(len(trajectories), 2)
     total = 0
     for payoff in payoffs:
         total += payoff
     self.assertEqual(total, 0)
Exemple #6
0
def test_run():
    env = rlcard3.make('mocsar')
    print(f"Env:{env} test_run")
    env.set_agents([
        RandomAgent(action_num=env.action_num),
        RandomAgent(action_num=env.action_num),
        RandomAgent(action_num=env.action_num),
        RandomAgent(action_num=env.action_num)
    ])
    trajectories, payoffs = env.run(is_training=False)
    assert len(trajectories) == 4  # There are four players
    total = 0
    for payoff in payoffs:
        total += payoff
    assert total == 0
    yield

    trajectories, payoffs = env.run(is_training=True)
    total = 0
    for payoff in payoffs:
        total += payoff
    assert total == 0
    yield
Exemple #7
0
 def test_run(self):
     env = rlcard3.make('mahjong')
     env.set_agents(
         [RandomAgent(env.action_num) for _ in range(env.player_num)])
     trajectories, payoffs = env.run(is_training=False)
     self.assertEqual(len(trajectories), 4)
     total = 0
     for payoff in payoffs:
         total += payoff
     self.assertEqual(total, 0)
     trajectories, payoffs = env.run(is_training=True, seed=1)
     total = 0
     for payoff in payoffs:
         total += payoff
     self.assertEqual(total, 0)
 def test_run(self):
     env = rlcard3.make('doudizhu')
     env.set_agents(
         [RandomAgent(env.action_num) for _ in range(env.player_num)])
     trajectories, payoffs = env.run(is_training=False)
     self.assertEqual(len(trajectories), 3)
     win = []
     for player_id, payoff in enumerate(payoffs):
         if payoff == 1:
             win.append(player_id)
     if len(win) == 1:
         self.assertEqual(env.game.players[win[0]].role, 'landlord')
     if len(win) == 2:
         self.assertEqual(env.game.players[win[0]].role, 'peasant')
         self.assertEqual(env.game.players[win[1]].role, 'peasant')
Exemple #9
0
    def normalize(self, e, num):
        """ Feed random data to normalizer

        Args:
            e (Env): AN Env class

            num (int): The number of steps to be normalized

        """
        print('**********Normalize begin**************')
        begin_step = e.timestep
        e.set_agents([RandomAgent() for _ in range(e.player_num)])
        while e.timestep - begin_step < num:
            trajectories, _ = e.run(is_training=False)

            for tra in trajectories:
                for ts in tra:
                    self.agent.feed(ts)
        print('**********Normalize end**************')
''' An example of playing Leduc Hold'em with random agents
'''

import rlcard3
from rlcard3.agents.random_agent import RandomAgent
from rlcard3.utils.utils import *

# Make environment
env = rlcard3.make('leduc-holdem')
episode_num = 2

# Set a global seed
set_global_seed(0)

# Set up agents
agent = RandomAgent(action_num=env.action_num)
env.set_agents([agent, agent])

for episode in range(episode_num):

    # Generate data from the environment
    trajectories, _ = env.run(is_training=False)

    # Print out the trajectories
    print('\nEpisode {}'.format(episode))
    for ts in trajectories[0]:
        print('State: {}, Action: {}, Reward: {}, Next State: {}, Done: {}'.
              format(ts[0], ts[1], ts[2], ts[3], ts[4]))
Exemple #11
0
    conf=conf)
# The paths for saving the logs and learning curves
log_dir = './experiments/mocsar_dqn_ra_pytorch_result/'

# Set a global seed
set_global_seed(0)

agent = DQNAgent(scope='dqn',
                 action_num=env.action_num,
                 replay_memory_init_size=memory_init_size,
                 train_every=train_every,
                 state_shape=env.state_shape,
                 mlp_layers=[512, 512],
                 device=torch.device('cuda'))

random_agent = RandomAgent(action_num=eval_env.action_num)

# Other agents
env.model.create_agents({"mocsar_min": 4})
env_agent_list = [env.model.rule_agents[i] for i in range(1, 4)]
env_agent_list.insert(0, agent)
env.set_agents(env_agent_list)

# Evaluation agent
eval_env.model.create_agents({"mocsar_random": 4})
eval_agent_list = [eval_env.model.rule_agents[i] for i in range(1, 4)]
eval_agent_list.insert(0, agent)
eval_env.set_agents(eval_agent_list)

# Init a Logger to plot the learning curve
logger = Logger(log_dir)
    episode_num = 10000

    # Assign tasks
    per_tasks = assign_task(episode_num, process_num)

    # Set game and make environment
    game = 'doudizhu'
    env = rlcard3.make(game)

    # Set global seed
    set_global_seed(1)

    # Set up agents
    agent_num = env.player_num
    env.set_agents(
        [RandomAgent(action_num=env.action_num) for _ in range(agent_num)])

    # Set a global list to reserve trajectories
    manager = multiprocessing.Manager()
    trajectories_set = manager.list()

    # Generate Processes
    processes = []
    for p in range(process_num):
        process = multiprocessing.Process(target=env.run_multi,
                                          args=(per_tasks[p],
                                                trajectories_set))
        processes.append(process)

    # Run process
    for p in processes:
# Load pretrained model
graph = tf.Graph()
sess = tf.Session(graph=graph)

with graph.as_default():
    nfsp_agents = []
    for i in range(env.player_num):
        agent = NFSPAgent(sess,
                          scope='nfsp' + str(i),
                          action_num=env.action_num,
                          state_shape=env.state_shape,
                          hidden_layers_sizes=[128,128],
                          q_mlp_layers=[128,128])
        nfsp_agents.append(agent)

# We have a pretrained model here. Change the path for your model.
check_point_path = os.path.join(rlcard3.__path__[0], 'models/pretrained/leduc_holdem_nfsp')

with sess.as_default():
    with graph.as_default():
        saver = tf.train.Saver()
        saver.restore(sess, tf.train.latest_checkpoint(check_point_path))

# Evaluate the performance. Play with random agents.
evaluate_num = 10000
random_agent = RandomAgent(env.action_num)
env.set_agents([nfsp_agents[0], random_agent])
reward = tournament(env, evaluate_num)[0]
print('Average reward against random agent: ', reward)

Exemple #14
0
''' A toy example of playing Uno with random agents
'''

import rlcard3
from rlcard3.agents.random_agent import RandomAgent
from rlcard3.utils.utils import set_global_seed

# Make environment
env = rlcard3.make('uno')
episode_num = 2

# Set a global seed
set_global_seed(0)

# Set up agents
agent_0 = RandomAgent(action_num=env.action_num)
agent_1 = RandomAgent(action_num=env.action_num)
agent_2 = RandomAgent(action_num=env.action_num)
agent_3 = RandomAgent(action_num=env.action_num)
env.set_agents([agent_0, agent_1, agent_2, agent_3])

for episode in range(episode_num):

    # Generate data from the environment
    trajectories, _ = env.run(is_training=False)

    # Print out the trajectories
    print('\nEpisode {}'.format(episode))
    for ts in trajectories[0]:
        print('State: {}, Action: {}, Reward: {}, Next State: {}, Done: {}'.
              format(ts[0], ts[1], ts[2], ts[3], ts[4]))