def setup_class(self): """Init Wrapper with MT10.""" # pylint: disable=import-outside-toplevel from metaworld.benchmarks import MT10 tasks = MT10.get_train_tasks().all_task_names envs = [] for task in tasks: envs.append(MT10.from_task(task)) self.env = MultiEnvWrapper(envs, sample_strategy=round_robin_strategy, metaworld_mt=True)
def setup_class(self): """Init Wrapper with MT10.""" # pylint: disable=import-outside-toplevel from metaworld.benchmarks import MT10 tasks = MT10.get_train_tasks().all_task_names envs = [] for task in tasks: envs.append(GymEnv(MT10.from_task(task))) self.task_names = tasks self.env = MultiEnvWrapper(envs, sample_strategy=round_robin_strategy, mode='vanilla', env_names=tasks) self.env_no_onehot = MultiEnvWrapper( envs, sample_strategy=round_robin_strategy, mode='del-onehot')
def test_all_mt10(): mt10_env = MT10() tasks = mt10_env.sample_tasks(11) for t in tasks: mt10_env.set_task(t) step_env(mt10_env, max_path_length=3) mt10_env.close() del mt10_env
def test_augment_observation(): """Test that the last 40 elements of obs array are sliced off correctly.""" env = MT10() for i in range(env.num_tasks): env.set_task(i) obs, _, _, _ = env.step(env.action_space.sample()) assert obs[-10:][i] == 1 obs = env.reset() assert obs[-10:][i] == 1
def MT10HelperEnv(): return MTEnv(MT10.get_train_tasks())
default=1, metavar='N', help='Value target update per no. of updates per step (default: 1)') parser.add_argument('--replay_size', type=int, default=1000000, metavar='N', help='size of replay buffer (default: 10000000)') parser.add_argument('--cuda', action="store_true", help='run on CUDA (default: False)') args = parser.parse_args() # Environment # env = NormalizedActions(gym.make(args.env_name)) env = MT10.get_train_tasks() #env = gym.make(args.env_name) torch.manual_seed(args.seed) np.random.seed(args.seed) env.seed(args.seed) # Agent agent = SAC(env.observation_space.shape[0], env.action_space, args) #Tesnorboard writer = SummaryWriter('runs/{}_SAC_batch_{}_{}_{}'.format( datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S"), args.env_name, args.policy, "autotune" if args.automatic_entropy_tuning else "")) # Memory memory = ReplayMemory(args.replay_size)
def te_ppo_mt10(ctxt, seed, n_epochs, batch_size_per_task): """Train Task Embedding PPO with PointEnv. Args: ctxt (garage.experiment.ExperimentContext): The experiment configuration used by LocalRunner to create the snapshotter. seed (int): Used to seed the random number generator to produce determinism. n_epochs (int): Total number of epochs for training. batch_size_per_task (int): Batch size of samples for each task. """ set_seed(seed) tasks = MT10.get_train_tasks().all_task_names envs = [normalize(GarageEnv(MT10.from_task(task))) for task in tasks] env = MultiEnvWrapper(envs, sample_strategy=round_robin_strategy, mode='del-onehot') latent_length = 4 inference_window = 6 batch_size = batch_size_per_task * len(tasks) policy_ent_coeff = 2e-2 encoder_ent_coeff = 2e-4 inference_ce_coeff = 5e-2 max_episode_length = 100 embedding_init_std = 0.1 embedding_max_std = 0.2 embedding_min_std = 1e-6 policy_init_std = 1.0 policy_max_std = None policy_min_std = None with LocalTFRunner(snapshot_config=ctxt) as runner: task_embed_spec = TEPPO.get_encoder_spec(env.task_space, latent_dim=latent_length) task_encoder = GaussianMLPEncoder( name='embedding', embedding_spec=task_embed_spec, hidden_sizes=(20, 20), std_share_network=True, init_std=embedding_init_std, max_std=embedding_max_std, output_nonlinearity=tf.nn.tanh, min_std=embedding_min_std, ) traj_embed_spec = TEPPO.get_infer_spec( env.spec, latent_dim=latent_length, inference_window_size=inference_window) inference = GaussianMLPEncoder( name='inference', embedding_spec=traj_embed_spec, hidden_sizes=(20, 10), std_share_network=True, init_std=2.0, output_nonlinearity=tf.nn.tanh, min_std=embedding_min_std, ) policy = GaussianMLPTaskEmbeddingPolicy( name='policy', env_spec=env.spec, encoder=task_encoder, hidden_sizes=(32, 16), std_share_network=True, max_std=policy_max_std, init_std=policy_init_std, min_std=policy_min_std, ) baseline = LinearMultiFeatureBaseline( env_spec=env.spec, features=['observations', 'tasks', 'latents']) algo = TEPPO(env_spec=env.spec, policy=policy, baseline=baseline, inference=inference, max_episode_length=max_episode_length, discount=0.99, lr_clip_range=0.2, policy_ent_coeff=policy_ent_coeff, encoder_ent_coeff=encoder_ent_coeff, inference_ce_coeff=inference_ce_coeff, use_softplus_entropy=True, optimizer_args=dict( batch_size=32, max_episode_length=10, learning_rate=1e-3, ), inference_optimizer_args=dict( batch_size=32, max_episode_length=10, ), center_adv=True, stop_ce_gradient=True) runner.setup(algo, env, sampler_cls=LocalSampler, sampler_args=None, worker_class=TaskEmbeddingWorker) runner.train(n_epochs=n_epochs, batch_size=batch_size, plot=False)
def test_random_init(): """Test that random_init == False for all envs.""" env = MT10() assert len(env._task_envs) == 10 for task_env in env._task_envs: assert not task_env.random_init
def _thunk(): if env_id.startswith("dm"): _, domain, task = env_id.split('.') env = dm_control2gym.make(domain_name=domain, task_name=task) elif env_id.startswith('metaworld_'): world_bench = env_id.split('_')[1] if world_bench.startswith('ml1.'): world_task = world_bench.split('.')[1] env = ML1.get_train_tasks(world_task) elif world_bench == 'ml10': env = ML10.get_train_tasks() elif world_bench == 'mt10': env = MT10.get_train_tasks() else: raise 'This code only supports metaworld ml1, ml10 or mt10.' env = MetaworldWrapper(env) else: env = gym.make(env_id) if obs_keys is not None: env = gym.wrappers.FlattenDictWrapper(env, dict_keys=obs_keys) is_atari = hasattr(gym.envs, 'atari') and isinstance( env.unwrapped, gym.envs.atari.atari_env.AtariEnv) if is_atari: env = make_atari(env_id) env.seed(seed + rank) obs_shape = env.observation_space.shape # if str(env.__class__.__name__).find('TimeLimit') >= 0: # env = TimeLimitMask(env) if log_dir is not None: if save_video: env = bench.Monitor(env, os.path.join(log_dir + '/eval/monitor', str(rank)), allow_early_resets=allow_early_resets) env = gym.wrappers.Monitor(env, os.path.join( log_dir + '/eval/video', str(rank)), force=True) else: env = bench.Monitor(env, os.path.join(log_dir + '/monitor', str(rank)), allow_early_resets=allow_early_resets) if is_atari: if len(env.observation_space.shape) == 3: env = wrap_deepmind(env) elif len(env.observation_space.shape) == 3: raise NotImplementedError( "CNN models work only for atari,\n" "please use a custom wrapper for a custom pixel input env.\n" "See wrap_deepmind for an example.") # If the input has shape (W,H,3), wrap for PyTorch convolutions obs_shape = env.observation_space.shape if len(obs_shape) == 3 and obs_shape[2] in [1, 3]: env = TransposeImage(env, op=[2, 0, 1]) return env