Beispiel #1
0
from rlcard3.games.mocsar.util_examples import init_environment, init_vars

# Config
conf = Config('environ.properties')
# Environemtn
env, eval_env = init_environment(conf=conf,
                                 env_id='mocsar-cfg',
                                 config={'multi_agent_mode': True})
# parameter variables
evaluate_num, evaluate_every, memory_init_size, train_every, episode_num = init_vars(
    conf=conf)
# The paths for saving the logs and learning curves
log_dir = './experiments/mocsar_dqn_ra_pytorch_result/'

# Set a global seed
set_global_seed(0)

agent = DQNAgent(scope='dqn',
                 action_num=env.action_num,
                 replay_memory_init_size=memory_init_size,
                 train_every=train_every,
                 state_shape=env.state_shape,
                 mlp_layers=[512, 512],
                 device=torch.device('cuda'))

random_agent = RandomAgent(action_num=eval_env.action_num)

# Other agents
env.model.create_agents({"mocsar_min": 4})
env_agent_list = [env.model.rule_agents[i] for i in range(1, 4)]
env_agent_list.insert(0, agent)
    # Set the number of process
    process_num = 8

    # Set episode_num
    episode_num = 10000

    # Assign tasks
    per_tasks = assign_task(episode_num, process_num)

    # Set game and make environment
    game = 'doudizhu'
    env = rlcard3.make(game)

    # Set global seed
    set_global_seed(1)

    # Set up agents
    agent_num = env.player_num
    env.set_agents(
        [RandomAgent(action_num=env.action_num) for _ in range(agent_num)])

    # Set a global list to reserve trajectories
    manager = multiprocessing.Manager()
    trajectories_set = manager.list()

    # Generate Processes
    processes = []
    for p in range(process_num):
        process = multiprocessing.Process(target=env.run_multi,
                                          args=(per_tasks[p],
Beispiel #3
0
 def test_set_global_seed(self):
     set_global_seed(0)
     self.assertEqual(np.random.get_state()[1][0], 0)
Beispiel #4
0
""" A toy example of playing Mocsar with random agents
"""
import tensorflow as tf
from rlcard3.agents.dqn_agent import DQNAgent
import rlcard3
from rlcard3.utils.utils import set_global_seed
from rlcard3.model_agents.registration import get_agents

# Make environment
env = rlcard3.make('mocsar')
episode_num = 2

# Set a global seed
set_global_seed(seed=0)

# Set up agents
agents = {"mocsar_random": 3, "mocsar_min": 1}

# Create DQN agent
# Set a global seed
set_global_seed(0)

# Load pretrained model
graph = tf.Graph()
sess = tf.Session(graph=graph)

with graph.as_default():
    agent = DQNAgent(sess,
                     scope='dqn',
                     action_num=env.action_num,
                     replay_memory_init_size=memory_init_size,
""" A toy example of playing Mocsar with random agents
"""

import rlcard3
from rlcard3.utils.utils import set_global_seed
from rlcard3.model_agents.registration import get_agents

# Make environment
env = rlcard3.make('mocsar')
episode_num = 2

# Set a global seed
set_global_seed(seed=0)  # TODO ez még nem fut...

# Set up agents
agents = {"mocsar_random": 2, "mocsar_min": 2}

env.set_agents(get_agents(agents=agents, nr_players=4))

for episode in range(episode_num):

    # Generate data from the environment
    trajectories, _ = env.run(is_training=False)

    # Print out the trajectories
    print('\nEpisode {}'.format(episode))
    for ts in trajectories[0]:
        print('State: {}, Action: {}, Reward: {}, Next State: {}, Done: {}'.
              format(ts[0], ts[1], ts[2], ts[3], ts[4]))