from rlcard3.games.mocsar.util_examples import init_environment, init_vars # Config conf = Config('environ.properties') # Environemtn env, eval_env = init_environment(conf=conf, env_id='mocsar-cfg', config={'multi_agent_mode': True}) # parameter variables evaluate_num, evaluate_every, memory_init_size, train_every, episode_num = init_vars( conf=conf) # The paths for saving the logs and learning curves log_dir = './experiments/mocsar_dqn_ra_pytorch_result/' # Set a global seed set_global_seed(0) agent = DQNAgent(scope='dqn', action_num=env.action_num, replay_memory_init_size=memory_init_size, train_every=train_every, state_shape=env.state_shape, mlp_layers=[512, 512], device=torch.device('cuda')) random_agent = RandomAgent(action_num=eval_env.action_num) # Other agents env.model.create_agents({"mocsar_min": 4}) env_agent_list = [env.model.rule_agents[i] for i in range(1, 4)] env_agent_list.insert(0, agent)
# Set the number of process process_num = 8 # Set episode_num episode_num = 10000 # Assign tasks per_tasks = assign_task(episode_num, process_num) # Set game and make environment game = 'doudizhu' env = rlcard3.make(game) # Set global seed set_global_seed(1) # Set up agents agent_num = env.player_num env.set_agents( [RandomAgent(action_num=env.action_num) for _ in range(agent_num)]) # Set a global list to reserve trajectories manager = multiprocessing.Manager() trajectories_set = manager.list() # Generate Processes processes = [] for p in range(process_num): process = multiprocessing.Process(target=env.run_multi, args=(per_tasks[p],
def test_set_global_seed(self): set_global_seed(0) self.assertEqual(np.random.get_state()[1][0], 0)
""" A toy example of playing Mocsar with random agents """ import tensorflow as tf from rlcard3.agents.dqn_agent import DQNAgent import rlcard3 from rlcard3.utils.utils import set_global_seed from rlcard3.model_agents.registration import get_agents # Make environment env = rlcard3.make('mocsar') episode_num = 2 # Set a global seed set_global_seed(seed=0) # Set up agents agents = {"mocsar_random": 3, "mocsar_min": 1} # Create DQN agent # Set a global seed set_global_seed(0) # Load pretrained model graph = tf.Graph() sess = tf.Session(graph=graph) with graph.as_default(): agent = DQNAgent(sess, scope='dqn', action_num=env.action_num, replay_memory_init_size=memory_init_size,
""" A toy example of playing Mocsar with random agents """ import rlcard3 from rlcard3.utils.utils import set_global_seed from rlcard3.model_agents.registration import get_agents # Make environment env = rlcard3.make('mocsar') episode_num = 2 # Set a global seed set_global_seed(seed=0) # TODO ez még nem fut... # Set up agents agents = {"mocsar_random": 2, "mocsar_min": 2} env.set_agents(get_agents(agents=agents, nr_players=4)) for episode in range(episode_num): # Generate data from the environment trajectories, _ = env.run(is_training=False) # Print out the trajectories print('\nEpisode {}'.format(episode)) for ts in trajectories[0]: print('State: {}, Action: {}, Reward: {}, Next State: {}, Done: {}'. format(ts[0], ts[1], ts[2], ts[3], ts[4]))