brain = env.brains[env.brain_names[0]] env_info = env.reset(train_mode=True)[env.brain_names[0]] n_agents = len(env_info.agents) print('Number of agents:', n_agents) action_size = brain.vector_action_space_size state_size = brain.vector_observation_space_size state_multiplier = brain.num_stacked_vector_observations action_type = brain.vector_action_space_type comment = f"MADDPG Unity Tennis" log_dir = os.path.join('./runs', current_time + '_' + comment) os.mkdir(log_dir) print(f"logging to {log_dir}") writer = SummaryWriter(log_dir=log_dir) config = DefaultMunch() config.seed = seed config.n_episodes = 40000 config.max_t = 1000 config.buffer_size = 100000 config.batch_size = 200 config.gamma = 0.99 config.tau = 0.001 config.lr_actor = 0.0001 config.lr_critic = 0.0001 config.n_agents = n_agents config.state_size = state_size * state_multiplier config.action_size = action_size config.learn_start = 10000 config.max_action = 1 # maximum value allowed for each action config.memory = ExperienceReplayMemory(config.buffer_size, seed) config.update_every = 2 config.device = torch.device(
worker_id=worker_id, seed=seed, no_graphics=False) brain = env.brains[env.brain_names[0]] env_info = env.reset(train_mode=False)[env.brain_names[0]] n_agents = len(env_info.agents) print('Number of agents:', n_agents) action_size = brain.vector_action_space_size state_size = brain.vector_observation_space_size state_multiplier = brain.num_stacked_vector_observations action_type = brain.vector_action_space_type comment = f"MADDPG Unity Tennis" rand_seed = 0 config = DefaultMunch() config.seed = seed config.n_episodes = 10 config.max_t = 1000 config.buffer_size = 100000 config.batch_size = 200 config.gamma = 0.99 config.tau = 0.001 config.lr_actor = 0.0001 config.lr_critic = 0.001 config.n_agents = n_agents config.state_size = state_size * state_multiplier config.action_size = action_size config.learn_start = 3000 config.max_action = 1 config.memory = ExperienceReplayMemory(config.buffer_size, rand_seed) config.update_every = 2 config.device = device