コード例 #1
0
ファイル: training.py プロジェクト: phate09/drl_collab_compet
 brain = env.brains[env.brain_names[0]]
 env_info = env.reset(train_mode=True)[env.brain_names[0]]
 n_agents = len(env_info.agents)
 print('Number of agents:', n_agents)
 action_size = brain.vector_action_space_size
 state_size = brain.vector_observation_space_size
 state_multiplier = brain.num_stacked_vector_observations
 action_type = brain.vector_action_space_type
 comment = f"MADDPG Unity Tennis"
 log_dir = os.path.join('./runs', current_time + '_' + comment)
 os.mkdir(log_dir)
 print(f"logging to {log_dir}")
 writer = SummaryWriter(log_dir=log_dir)
 config = DefaultMunch()
 config.seed = seed
 config.n_episodes = 40000
 config.max_t = 1000
 config.buffer_size = 100000
 config.batch_size = 200
 config.gamma = 0.99
 config.tau = 0.001
 config.lr_actor = 0.0001
 config.lr_critic = 0.0001
 config.n_agents = n_agents
 config.state_size = state_size * state_multiplier
 config.action_size = action_size
 config.learn_start = 10000
 config.max_action = 1  # maximum value allowed for each action
 config.memory = ExperienceReplayMemory(config.buffer_size, seed)
 config.update_every = 2
 config.device = torch.device(
コード例 #2
0
                        worker_id=worker_id,
                        seed=seed,
                        no_graphics=False)
 brain = env.brains[env.brain_names[0]]
 env_info = env.reset(train_mode=False)[env.brain_names[0]]
 n_agents = len(env_info.agents)
 print('Number of agents:', n_agents)
 action_size = brain.vector_action_space_size
 state_size = brain.vector_observation_space_size
 state_multiplier = brain.num_stacked_vector_observations
 action_type = brain.vector_action_space_type
 comment = f"MADDPG Unity Tennis"
 rand_seed = 0
 config = DefaultMunch()
 config.seed = seed
 config.n_episodes = 10
 config.max_t = 1000
 config.buffer_size = 100000
 config.batch_size = 200
 config.gamma = 0.99
 config.tau = 0.001
 config.lr_actor = 0.0001
 config.lr_critic = 0.001
 config.n_agents = n_agents
 config.state_size = state_size * state_multiplier
 config.action_size = action_size
 config.learn_start = 3000
 config.max_action = 1
 config.memory = ExperienceReplayMemory(config.buffer_size, rand_seed)
 config.update_every = 2
 config.device = device