Esempio n. 1
0
 def setup_class(self):
     """Init Wrapper with MT10."""
     # pylint: disable=import-outside-toplevel
     from metaworld.benchmarks import MT10
     tasks = MT10.get_train_tasks().all_task_names
     envs = []
     for task in tasks:
         envs.append(MT10.from_task(task))
     self.env = MultiEnvWrapper(envs,
                                sample_strategy=round_robin_strategy,
                                metaworld_mt=True)
Esempio n. 2
0
 def setup_class(self):
     """Init Wrapper with MT10."""
     # pylint: disable=import-outside-toplevel
     from metaworld.benchmarks import MT10
     tasks = MT10.get_train_tasks().all_task_names
     envs = []
     for task in tasks:
         envs.append(GymEnv(MT10.from_task(task)))
     self.task_names = tasks
     self.env = MultiEnvWrapper(envs,
                                sample_strategy=round_robin_strategy,
                                mode='vanilla',
                                env_names=tasks)
     self.env_no_onehot = MultiEnvWrapper(
         envs, sample_strategy=round_robin_strategy, mode='del-onehot')
Esempio n. 3
0
def test_all_mt10():
    mt10_env = MT10()
    tasks = mt10_env.sample_tasks(11)
    for t in tasks:
        mt10_env.set_task(t)
        step_env(mt10_env, max_path_length=3)

    mt10_env.close()
    del mt10_env
Esempio n. 4
0
def test_augment_observation():
    """Test that the last 40 elements of obs array are sliced off correctly."""
    env = MT10()
    for i in range(env.num_tasks):
        env.set_task(i)
        obs, _, _, _ = env.step(env.action_space.sample())
        assert obs[-10:][i] == 1
        obs = env.reset()
        assert obs[-10:][i] == 1
Esempio n. 5
0
def MT10HelperEnv():
    return MTEnv(MT10.get_train_tasks())
Esempio n. 6
0
    default=1,
    metavar='N',
    help='Value target update per no. of updates per step (default: 1)')
parser.add_argument('--replay_size',
                    type=int,
                    default=1000000,
                    metavar='N',
                    help='size of replay buffer (default: 10000000)')
parser.add_argument('--cuda',
                    action="store_true",
                    help='run on CUDA (default: False)')
args = parser.parse_args()

# Environment
# env = NormalizedActions(gym.make(args.env_name))
env = MT10.get_train_tasks()
#env = gym.make(args.env_name)
torch.manual_seed(args.seed)
np.random.seed(args.seed)
env.seed(args.seed)

# Agent
agent = SAC(env.observation_space.shape[0], env.action_space, args)

#Tesnorboard
writer = SummaryWriter('runs/{}_SAC_batch_{}_{}_{}'.format(
    datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S"), args.env_name,
    args.policy, "autotune" if args.automatic_entropy_tuning else ""))

# Memory
memory = ReplayMemory(args.replay_size)
Esempio n. 7
0
def te_ppo_mt10(ctxt, seed, n_epochs, batch_size_per_task):
    """Train Task Embedding PPO with PointEnv.

    Args:
        ctxt (garage.experiment.ExperimentContext): The experiment
            configuration used by LocalRunner to create the snapshotter.
        seed (int): Used to seed the random number generator to produce
            determinism.
        n_epochs (int): Total number of epochs for training.
        batch_size_per_task (int): Batch size of samples for each task.

    """
    set_seed(seed)
    tasks = MT10.get_train_tasks().all_task_names
    envs = [normalize(GarageEnv(MT10.from_task(task))) for task in tasks]
    env = MultiEnvWrapper(envs,
                          sample_strategy=round_robin_strategy,
                          mode='del-onehot')

    latent_length = 4
    inference_window = 6
    batch_size = batch_size_per_task * len(tasks)
    policy_ent_coeff = 2e-2
    encoder_ent_coeff = 2e-4
    inference_ce_coeff = 5e-2
    max_episode_length = 100
    embedding_init_std = 0.1
    embedding_max_std = 0.2
    embedding_min_std = 1e-6
    policy_init_std = 1.0
    policy_max_std = None
    policy_min_std = None

    with LocalTFRunner(snapshot_config=ctxt) as runner:

        task_embed_spec = TEPPO.get_encoder_spec(env.task_space,
                                                 latent_dim=latent_length)

        task_encoder = GaussianMLPEncoder(
            name='embedding',
            embedding_spec=task_embed_spec,
            hidden_sizes=(20, 20),
            std_share_network=True,
            init_std=embedding_init_std,
            max_std=embedding_max_std,
            output_nonlinearity=tf.nn.tanh,
            min_std=embedding_min_std,
        )

        traj_embed_spec = TEPPO.get_infer_spec(
            env.spec,
            latent_dim=latent_length,
            inference_window_size=inference_window)

        inference = GaussianMLPEncoder(
            name='inference',
            embedding_spec=traj_embed_spec,
            hidden_sizes=(20, 10),
            std_share_network=True,
            init_std=2.0,
            output_nonlinearity=tf.nn.tanh,
            min_std=embedding_min_std,
        )

        policy = GaussianMLPTaskEmbeddingPolicy(
            name='policy',
            env_spec=env.spec,
            encoder=task_encoder,
            hidden_sizes=(32, 16),
            std_share_network=True,
            max_std=policy_max_std,
            init_std=policy_init_std,
            min_std=policy_min_std,
        )

        baseline = LinearMultiFeatureBaseline(
            env_spec=env.spec, features=['observations', 'tasks', 'latents'])

        algo = TEPPO(env_spec=env.spec,
                     policy=policy,
                     baseline=baseline,
                     inference=inference,
                     max_episode_length=max_episode_length,
                     discount=0.99,
                     lr_clip_range=0.2,
                     policy_ent_coeff=policy_ent_coeff,
                     encoder_ent_coeff=encoder_ent_coeff,
                     inference_ce_coeff=inference_ce_coeff,
                     use_softplus_entropy=True,
                     optimizer_args=dict(
                         batch_size=32,
                         max_episode_length=10,
                         learning_rate=1e-3,
                     ),
                     inference_optimizer_args=dict(
                         batch_size=32,
                         max_episode_length=10,
                     ),
                     center_adv=True,
                     stop_ce_gradient=True)

        runner.setup(algo,
                     env,
                     sampler_cls=LocalSampler,
                     sampler_args=None,
                     worker_class=TaskEmbeddingWorker)
        runner.train(n_epochs=n_epochs, batch_size=batch_size, plot=False)
Esempio n. 8
0
def test_random_init():
    """Test that random_init == False for all envs."""
    env = MT10()
    assert len(env._task_envs) == 10
    for task_env in env._task_envs:
        assert not task_env.random_init
Esempio n. 9
0
    def _thunk():
        if env_id.startswith("dm"):
            _, domain, task = env_id.split('.')
            env = dm_control2gym.make(domain_name=domain, task_name=task)
        elif env_id.startswith('metaworld_'):
            world_bench = env_id.split('_')[1]
            if world_bench.startswith('ml1.'):
                world_task = world_bench.split('.')[1]
                env = ML1.get_train_tasks(world_task)
            elif world_bench == 'ml10':
                env = ML10.get_train_tasks()
            elif world_bench == 'mt10':
                env = MT10.get_train_tasks()
            else:
                raise 'This code only supports metaworld ml1, ml10 or mt10.'

            env = MetaworldWrapper(env)
        else:
            env = gym.make(env_id)

        if obs_keys is not None:
            env = gym.wrappers.FlattenDictWrapper(env, dict_keys=obs_keys)

        is_atari = hasattr(gym.envs, 'atari') and isinstance(
            env.unwrapped, gym.envs.atari.atari_env.AtariEnv)
        if is_atari:
            env = make_atari(env_id)

        env.seed(seed + rank)

        obs_shape = env.observation_space.shape

        # if str(env.__class__.__name__).find('TimeLimit') >= 0:
        #     env = TimeLimitMask(env)

        if log_dir is not None:
            if save_video:
                env = bench.Monitor(env,
                                    os.path.join(log_dir + '/eval/monitor',
                                                 str(rank)),
                                    allow_early_resets=allow_early_resets)

                env = gym.wrappers.Monitor(env,
                                           os.path.join(
                                               log_dir + '/eval/video',
                                               str(rank)),
                                           force=True)
            else:
                env = bench.Monitor(env,
                                    os.path.join(log_dir + '/monitor',
                                                 str(rank)),
                                    allow_early_resets=allow_early_resets)

        if is_atari:
            if len(env.observation_space.shape) == 3:
                env = wrap_deepmind(env)
        elif len(env.observation_space.shape) == 3:
            raise NotImplementedError(
                "CNN models work only for atari,\n"
                "please use a custom wrapper for a custom pixel input env.\n"
                "See wrap_deepmind for an example.")

        # If the input has shape (W,H,3), wrap for PyTorch convolutions
        obs_shape = env.observation_space.shape
        if len(obs_shape) == 3 and obs_shape[2] in [1, 3]:
            env = TransposeImage(env, op=[2, 0, 1])

        return env