Exemple #1
0
    def test_is_pickleable(self, obs_dim, embedding_dim):
        env = GarageEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=embedding_dim))
        embedding_spec = InOutSpec(input_space=env.spec.observation_space,
                                   output_space=env.spec.action_space)
        embedding = GaussianMLPEncoder(embedding_spec)

        env.reset()
        obs, _, _, _ = env.step(1)
        obs_dim = env.spec.observation_space.flat_dim

        with tf.compat.v1.variable_scope('GaussianMLPEncoder/GaussianMLPModel',
                                         reuse=True):
            bias = tf.compat.v1.get_variable(
                'dist_params/mean_network/hidden_0/bias')
        # assign it to all one
        bias.load(tf.ones_like(bias).eval())
        output1 = self.sess.run(
            [embedding.distribution.loc,
             embedding.distribution.stddev()],
            feed_dict={embedding.model.input: [[obs.flatten()]]})

        p = pickle.dumps(embedding)
        with tf.compat.v1.Session(graph=tf.Graph()) as sess:
            embedding_pickled = pickle.loads(p)

            output2 = sess.run(
                [
                    embedding_pickled.distribution.loc,
                    embedding_pickled.distribution.stddev()
                ],
                feed_dict={embedding_pickled.model.input: [[obs.flatten()]]})
            assert np.array_equal(output1, output2)
Exemple #2
0
    def get_env_spec(cls, env_spec, latent_dim, module):
        """Get environment specs of encoder with latent dimension.

        Args:
            env_spec (garage.envs.EnvSpec): Environment specs.
            latent_dim (int): Latent dimension.
            module (str): Module to get environment specs for.

        Returns:
            garage.envs.InOutSpec: Module environment specs with latent
                dimension.

        """
        obs_dim = int(np.prod(env_spec.observation_space.shape))
        action_dim = int(np.prod(env_spec.action_space.shape))
        if module == 'encoder':
            in_dim = obs_dim + action_dim + 1
            out_dim = latent_dim * 2
        elif module == 'vf':
            in_dim = obs_dim
            out_dim = latent_dim
        in_space = akro.Box(low=-1, high=1, shape=(in_dim, ), dtype=np.float32)
        out_space = akro.Box(low=-1,
                             high=1,
                             shape=(out_dim, ),
                             dtype=np.float32)
        if module == 'encoder':
            spec = InOutSpec(in_space, out_space)
        elif module == 'vf':
            spec = EnvSpec(in_space, out_space)

        return spec
Exemple #3
0
    def test_is_pickleable(self, obs_dim, embedding_dim):
        env = TfEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=embedding_dim))
        with mock.patch(('garage.tf.embeddings.'
                         'gaussian_mlp_encoder.GaussianMLPModel'),
                        new=SimpleGaussianMLPModel):
            embedding_spec = InOutSpec(input_space=env.spec.observation_space,
                                       output_space=env.spec.action_space)
            embedding = GaussianMLPEncoder(embedding_spec)

        env.reset()
        obs, _, _, _ = env.step(1)
        obs_dim = env.spec.observation_space.flat_dim

        with tf.compat.v1.variable_scope('GaussianMLPEncoder/GaussianMLPModel',
                                         reuse=True):
            return_var = tf.compat.v1.get_variable('return_var')
        # assign it to all one
        return_var.load(tf.ones_like(return_var).eval())
        output1 = self.sess.run(
            embedding.model.outputs[:-1],
            feed_dict={embedding.model.input: [obs.flatten()]})

        p = pickle.dumps(embedding)
        with tf.compat.v1.Session(graph=tf.Graph()) as sess:
            embedding_pickled = pickle.loads(p)
            output2 = sess.run(
                embedding_pickled.model.outputs[:-1],
                feed_dict={embedding_pickled.model.input: [obs.flatten()]})
            assert np.array_equal(output1, output2)
Exemple #4
0
    def get_infer_spec(cls, env_spec, latent_dim, inference_window_size):
        """Get the embedding spec of the inference.

        Every `inference_window_size` timesteps in the trajectory will be used
        as the inference network input.

        Args:
            env_spec (garage.envs.EnvSpec): Environment spec.
            latent_dim (int): Latent dimension.
            inference_window_size (int): Length of inference window.

        Returns:
            garage.InOutSpec: Inference spec.

        """
        latent_space = cls._get_latent_space(latent_dim)

        obs_lb, obs_ub = env_spec.observation_space.bounds
        obs_lb_flat = env_spec.observation_space.flatten(obs_lb)
        obs_ub_flat = env_spec.observation_space.flatten(obs_ub)
        traj_lb = np.stack([obs_lb_flat] * inference_window_size)
        traj_ub = np.stack([obs_ub_flat] * inference_window_size)
        traj_space = akro.Box(traj_lb, traj_ub)

        return InOutSpec(traj_space, latent_space)
Exemple #5
0
    def get_env_spec(cls, env_spec, latent_dim, num_skills, module):
        obs_dim = int(np.prod(env_spec.observation_space.shape))
        # print("obs_dim is")
        # print(obs_dim)
        action_dim = int(np.prod(env_spec.action_space.shape))
        if module == 'encoder':
            in_dim = obs_dim + action_dim + num_skills + 1
            out_dim = latent_dim * 2
        elif module == 'vf':
            in_dim = obs_dim
            out_dim = latent_dim
        elif module == 'controller_policy':
            in_dim = obs_dim + latent_dim
            out_dim = num_skills
        elif module == 'qf':
            in_dim = obs_dim + latent_dim
            out_dim = num_skills

        in_space = akro.Box(low=-1, high=1, shape=(in_dim, ), dtype=np.float32)
        out_space = akro.Box(low=-1,
                             high=1,
                             shape=(out_dim, ),
                             dtype=np.float32)

        if module == 'encoder':
            spec = InOutSpec(in_space, out_space)
        elif module == 'vf':
            spec = EnvSpec(in_space, out_space)
        elif module == 'controller_policy':
            spec = EnvSpec(in_space, out_space)
        elif module == 'qf':
            spec = EnvSpec(in_space, out_space)
        return spec
Exemple #6
0
    def test_get_vars(self):
        obs_dim, action_dim, task_num, latent_dim = (2, ), (2, ), 5, 2
        env = GarageEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim))
        embedding_spec = InOutSpec(
            input_space=akro.Box(low=np.zeros(task_num),
                                 high=np.ones(task_num)),
            output_space=akro.Box(low=np.zeros(latent_dim),
                                  high=np.ones(latent_dim)))
        encoder = GaussianMLPEncoder(embedding_spec, hidden_sizes=[32, 32, 32])
        policy = GaussianMLPTaskEmbeddingPolicy(env_spec=env.spec,
                                                encoder=encoder,
                                                hidden_sizes=[32, 32, 32])

        vars1 = sorted(policy.get_trainable_vars(), key=lambda v: v.name)
        vars2 = sorted(policy.get_global_vars(), key=lambda v: v.name)

        assert vars1 == vars2
        # Two network. Each with 4 layers * (1 weight + 1 bias) + 1 log_std
        assert len(vars1) == 2 * (4 * 2 + 1)

        obs = np.random.random(obs_dim)
        latent = np.random.random((latent_dim, ))

        for var in vars1:
            var.assign(np.ones(var.shape))
        assert np.any(policy.get_action_given_latent(obs, latent) != 0)

        for var in vars1:
            var.assign(np.zeros(var.shape))
        assert not np.all(policy.get_action_given_latent(obs, latent) == 0)
Exemple #7
0
    def test_get_action(self, obs_dim, task_num, latent_dim, action_dim):
        env = GarageEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim))
        embedding_spec = InOutSpec(
            input_space=akro.Box(low=np.zeros(task_num),
                                 high=np.ones(task_num)),
            output_space=akro.Box(low=np.zeros(latent_dim),
                                  high=np.ones(latent_dim)))
        encoder = GaussianMLPEncoder(embedding_spec)
        policy = GaussianMLPTaskEmbeddingPolicy(env_spec=env.spec,
                                                encoder=encoder)

        env.reset()
        obs, _, _, _ = env.step(1)
        latent = np.random.random((latent_dim, ))
        task = np.zeros(task_num)
        task[0] = 1

        action1, _ = policy.get_action_given_latent(obs, latent)
        action2, _ = policy.get_action_given_task(obs, task)
        action3, _ = policy.get_action(np.concatenate([obs.flatten(), task]))

        assert env.action_space.contains(action1)
        assert env.action_space.contains(action2)
        assert env.action_space.contains(action3)

        obses, latents, tasks = [obs] * 3, [latent] * 3, [task] * 3
        aug_obses = [np.concatenate([obs.flatten(), task])] * 3
        action1n, _ = policy.get_actions_given_latents(obses, latents)
        action2n, _ = policy.get_actions_given_tasks(obses, tasks)
        action3n, _ = policy.get_actions(aug_obses)

        for action in chain(action1n, action2n, action3n):
            assert env.action_space.contains(action)
Exemple #8
0
    def test_auxiliary(self):
        obs_dim, action_dim, task_num, latent_dim = (2, ), (2, ), 2, 2
        env = GarageEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim))
        embedding_spec = InOutSpec(
            input_space=akro.Box(low=np.zeros(task_num),
                                 high=np.ones(task_num)),
            output_space=akro.Box(low=np.zeros(latent_dim),
                                  high=np.ones(latent_dim)))
        encoder = GaussianMLPEncoder(embedding_spec)
        policy = GaussianMLPTaskEmbeddingPolicy(env_spec=env.spec,
                                                encoder=encoder)
        obs_input = tf.compat.v1.placeholder(tf.float32, shape=(None, None, 2))
        task_input = tf.compat.v1.placeholder(tf.float32,
                                              shape=(None, None, 2))
        policy.build(obs_input, task_input)

        assert policy.distribution.loc.get_shape().as_list(
        )[-1] == env.action_space.flat_dim
        assert policy.encoder == encoder
        assert policy.latent_space.flat_dim == latent_dim
        assert policy.task_space.flat_dim == task_num
        assert (policy.augmented_observation_space.flat_dim ==
                env.observation_space.flat_dim + task_num)
        assert policy.encoder_distribution.loc.get_shape().as_list(
        )[-1] == latent_dim
Exemple #9
0
    def test_dist_info(self, obs_dim, embedding_dim):
        env = TfEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=embedding_dim))
        with mock.patch(('garage.tf.embeddings.'
                         'gaussian_mlp_encoder.GaussianMLPModel'),
                        new=SimpleGaussianMLPModel):
            embedding_spec = InOutSpec(input_space=env.spec.observation_space,
                                       output_space=env.spec.action_space)
            embedding = GaussianMLPEncoder(embedding_spec)

        env.reset()
        obs, _, _, _ = env.step(1)

        obs_dim = env.spec.observation_space.flat_dim
        obs_ph = tf.compat.v1.placeholder(tf.float32, shape=(None, obs_dim))

        dist1_sym = embedding.dist_info_sym(obs_ph, name='p1_sym')

        # flatten output
        expected_mean = [np.full(np.prod(embedding_dim), 0.5)]
        expected_log_std = [np.full(np.prod(embedding_dim), np.log(0.5))]

        prob0 = embedding.dist_info(obs.flatten())
        prob1 = self.sess.run(dist1_sym, feed_dict={obs_ph: [obs.flatten()]})

        assert np.array_equal(prob0['mean'].flatten(), expected_mean[0])
        assert np.array_equal(prob0['log_std'].flatten(), expected_log_std[0])
        assert np.array_equal(prob1['mean'], expected_mean)
        assert np.array_equal(prob1['log_std'], expected_log_std)
 def test_clone(self):
     env = GarageEnv(DummyBoxEnv(obs_dim=(2, ), action_dim=(2, )))
     embedding_spec = InOutSpec(input_space=env.spec.observation_space,
                                output_space=env.spec.action_space)
     embedding = GaussianMLPEncoder(embedding_spec)
     clone_embedding = embedding.clone(name='cloned')
     assert clone_embedding.input_dim == embedding.input_dim
     assert clone_embedding.output_dim == embedding.output_dim
    def __init__(self,
                 spec,
                 image_format,
                 *,
                 kernel_sizes,
                 hidden_channels,
                 strides,
                 hidden_sizes=(32, 32),
                 cnn_hidden_nonlinearity=nn.ReLU,
                 mlp_hidden_nonlinearity=nn.ReLU,
                 hidden_w_init=nn.init.xavier_uniform_,
                 hidden_b_init=nn.init.zeros_,
                 paddings=0,
                 padding_mode='zeros',
                 max_pool=False,
                 pool_shape=None,
                 pool_stride=1,
                 output_nonlinearity=None,
                 output_w_init=nn.init.xavier_uniform_,
                 output_b_init=nn.init.zeros_,
                 layer_normalization=False):

        super().__init__()

        cnn_spec = InOutSpec(input_space=spec.input_space, output_space=None)
        cnn_module = CNNModule(spec=cnn_spec,
                               image_format=image_format,
                               kernel_sizes=kernel_sizes,
                               strides=strides,
                               hidden_w_init=hidden_w_init,
                               hidden_b_init=hidden_b_init,
                               hidden_channels=hidden_channels,
                               hidden_nonlinearity=cnn_hidden_nonlinearity,
                               paddings=paddings,
                               padding_mode=padding_mode,
                               max_pool=max_pool,
                               layer_normalization=layer_normalization,
                               pool_shape=pool_shape,
                               pool_stride=pool_stride)
        flat_dim = cnn_module.spec.output_space.flat_dim

        output_dim = spec.output_space.flat_dim
        mlp_module = MLPModule(flat_dim,
                               output_dim,
                               hidden_sizes,
                               hidden_nonlinearity=mlp_hidden_nonlinearity,
                               hidden_w_init=hidden_w_init,
                               hidden_b_init=hidden_b_init,
                               output_nonlinearity=output_nonlinearity,
                               output_w_init=output_w_init,
                               output_b_init=output_b_init,
                               layer_normalization=layer_normalization)

        if mlp_hidden_nonlinearity is None:
            self._module = nn.Sequential(cnn_module, nn.Flatten(), mlp_module)
        else:
            self._module = nn.Sequential(cnn_module, mlp_hidden_nonlinearity(),
                                         nn.Flatten(), mlp_module)
Exemple #12
0
def test_output_values(output_dim, kernel_sizes, hidden_channels, strides,
                       paddings):

    input_width = 32
    input_height = 32
    in_channel = 3
    input_shape = (in_channel, input_height, input_width)
    spec = InOutSpec(akro.Box(shape=input_shape, low=-np.inf, high=np.inf),
                     akro.Box(shape=(output_dim, ), low=-np.inf, high=np.inf))
    obs = torch.rand(input_shape)

    module = DiscreteCNNModule(spec=spec,
                               image_format='NCHW',
                               hidden_channels=hidden_channels,
                               hidden_sizes=hidden_channels,
                               kernel_sizes=kernel_sizes,
                               strides=strides,
                               paddings=paddings,
                               padding_mode='zeros',
                               hidden_w_init=nn.init.ones_,
                               output_w_init=nn.init.ones_)

    cnn = CNNModule(spec=InOutSpec(
        akro.Box(shape=input_shape, low=-np.inf, high=np.inf), None),
                    image_format='NCHW',
                    hidden_channels=hidden_channels,
                    kernel_sizes=kernel_sizes,
                    strides=strides,
                    paddings=paddings,
                    padding_mode='zeros',
                    hidden_w_init=nn.init.ones_)
    flat_dim = torch.flatten(cnn(obs).detach(), start_dim=1).shape[1]

    mlp = MLPModule(
        flat_dim,
        output_dim,
        hidden_channels,
        hidden_w_init=nn.init.ones_,
        output_w_init=nn.init.ones_,
    )

    cnn_out = cnn(obs)
    output = mlp(torch.flatten(cnn_out, start_dim=1))

    assert torch.all(torch.eq(output.detach(), module(obs).detach()))
    def __init__(self,
                 env_spec,
                 image_format,
                 kernel_sizes,
                 *,
                 hidden_channels,
                 strides=1,
                 hidden_sizes=(32, 32),
                 hidden_nonlinearity=torch.tanh,
                 hidden_w_init=nn.init.xavier_uniform_,
                 hidden_b_init=nn.init.zeros_,
                 paddings=0,
                 padding_mode='zeros',
                 max_pool=False,
                 pool_shape=None,
                 pool_stride=1,
                 output_w_init=nn.init.xavier_uniform_,
                 output_b_init=nn.init.zeros_,
                 layer_normalization=False,
                 name='CategoricalCNNPolicy'):

        if not isinstance(env_spec.action_space, akro.Discrete):
            raise ValueError('CategoricalMLPPolicy only works '
                             'with akro.Discrete action space.')
        if isinstance(env_spec.observation_space, akro.Dict):
            raise ValueError('CNN policies do not support '
                             'with akro.Dict observation spaces.')

        super().__init__(env_spec, name)

        self._cnn_module = CNNModule(InOutSpec(
            self._env_spec.observation_space, None),
                                     image_format=image_format,
                                     kernel_sizes=kernel_sizes,
                                     strides=strides,
                                     hidden_channels=hidden_channels,
                                     hidden_w_init=hidden_w_init,
                                     hidden_b_init=hidden_b_init,
                                     hidden_nonlinearity=hidden_nonlinearity,
                                     paddings=paddings,
                                     padding_mode=padding_mode,
                                     max_pool=max_pool,
                                     pool_shape=pool_shape,
                                     pool_stride=pool_stride,
                                     layer_normalization=layer_normalization)
        self._mlp_module = MultiHeadedMLPModule(
            n_heads=1,
            input_dim=self._cnn_module.spec.output_space.flat_dim,
            output_dims=[self._env_spec.action_space.flat_dim],
            hidden_sizes=hidden_sizes,
            hidden_w_init=hidden_w_init,
            hidden_b_init=hidden_b_init,
            hidden_nonlinearity=hidden_nonlinearity,
            output_w_inits=output_w_init,
            output_b_inits=output_b_init)
Exemple #14
0
    def test_get_embedding(self, obs_dim, embedding_dim):
        env = GarageEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=embedding_dim))
        embedding_spec = InOutSpec(input_space=env.spec.observation_space,
                                   output_space=env.spec.action_space)
        embedding = GaussianMLPEncoder(embedding_spec)

        env.reset()
        obs, _, _, _ = env.step(1)

        latent, _ = embedding.forward(obs)
        assert env.action_space.contains(latent)
 def test_clone(self):
     env = GymEnv(DummyBoxEnv(obs_dim=(2, ), action_dim=(2, )))
     embedding_spec = InOutSpec(input_space=env.spec.observation_space,
                                output_space=env.spec.action_space)
     embedding = GaussianMLPEncoder(embedding_spec)
     clone_embedding = embedding.clone(name='cloned')
     assert clone_embedding.input_dim == embedding.input_dim
     for cloned_param, param in zip(
             clone_embedding.model.parameters.values(),
             embedding.model.parameters.values()):
         assert np.array_equal(cloned_param, param)
     assert clone_embedding.output_dim == embedding.output_dim
    def test_auxiliary(self):
        input_space = akro.Box(np.array([-1, -1]), np.array([1, 1]))
        latent_space = akro.Box(np.array([-2, -2, -2]), np.array([2, 2, 2]))
        embedding_spec = InOutSpec(input_space=input_space,
                                   output_space=latent_space)
        embedding = GaussianMLPEncoder(embedding_spec,
                                       hidden_sizes=[32, 32, 32])
        task_input = tf.compat.v1.placeholder(tf.float32,
                                              shape=(None, None,
                                                     embedding.input_dim))
        embedding.build(task_input)
        # 9 Layers: (3 hidden + 1 output) * (1 weight + 1 bias) + 1 log_std
        assert len(embedding.get_params()) == 9
        assert len(embedding.get_global_vars()) == 9

        assert embedding.distribution.loc.get_shape().as_list(
        )[-1] == latent_space.shape[0]
        assert embedding.input.shape.as_list() == [
            None, None, input_space.shape[0]
        ]
        assert (embedding.latent_mean.shape.as_list() == [
            None, None, latent_space.shape[0]
        ])
        assert (embedding.latent_std_param.shape.as_list() == [
            None, None, latent_space.shape[0]
        ])

        # To increase coverage in embeddings/base.py
        embedding.reset()
        assert embedding.input_dim == embedding_spec.input_space.flat_dim
        assert embedding.output_dim == embedding_spec.output_space.flat_dim

        var_shapes = [
            (2, 32),
            (32, ),  # input
            (32, 32),
            (32, ),  # hidden 0
            (32, 32),
            (32, ),  # hidden 1
            (32, 3),
            (3, ),  # hidden 2
            (3, )
        ]  # log_std
        assert sorted(embedding.get_param_shapes()) == sorted(var_shapes)

        var_count = sum(list(map(np.prod, var_shapes)))
        embedding.set_param_values(np.ones(var_count))
        assert (embedding.get_param_values() == np.ones(var_count)).all()

        assert (sorted(
            map(np.shape, embedding.flat_to_params(
                np.ones(var_count)))) == sorted(var_shapes))
Exemple #17
0
    def get_encoder_spec(cls, task_space, latent_dim):
        """Get the embedding spec of the encoder.

        Args:
            task_space (akro.Space): Task spec.
            latent_dim (int): Latent dimension.

        Returns:
            garage.InOutSpec: Encoder spec.

        """
        latent_space = cls._get_latent_space(latent_dim)
        return InOutSpec(task_space, latent_space)
Exemple #18
0
def test_check_spec():
    with pytest.raises(ValueError, match='should be an akro.Box'):
        # Input space is not Box or Image
        _check_spec(InOutSpec(akro.Dict(), None), 'NCHW')
    with pytest.raises(ValueError, match='should have three dimensions'):
        # Too many input dimensions
        _check_spec(
            InOutSpec(akro.Box(shape=[1, 1, 1, 1], low=-np.inf, high=np.inf),
                      None), 'NCHW')
    with pytest.raises(ValueError, match='akro.Box with a single dimension'):
        # Output is not one-dimensional
        _check_spec(
            InOutSpec(akro.Box(shape=[1, 1, 1], low=-np.inf, high=np.inf),
                      akro.Box(
                          shape=[1, 1],
                          low=-np.inf,
                          high=np.inf,
                      )), 'NCHW')
    with pytest.warns(UserWarning):
        # 4 color channels should warn
        _check_spec(
            InOutSpec(akro.Box(shape=[4, 1, 1], low=-np.inf, high=np.inf),
                      None), 'NCHW')
Exemple #19
0
 def setup_method(self):
     self.batch_size = 64
     self.input_width = 32
     self.input_height = 32
     self.in_channel = 3
     self.dtype = torch.float32
     self.input_spec = InOutSpec(
         akro.Box(
             shape=[self.in_channel, self.input_height, self.input_width],
             high=np.inf,
             low=np.inf), None)
     self.input = torch.zeros(
         (self.batch_size, self.in_channel, self.input_height,
          self.input_width),
         dtype=self.dtype)  # minibatch size 64, image size [3, 32, 32]
    def test_get_embedding(self, obs_dim, embedding_dim):
        env = GarageEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=embedding_dim))
        embedding_spec = InOutSpec(input_space=env.spec.observation_space,
                                   output_space=env.spec.action_space)
        embedding = GaussianMLPEncoder(embedding_spec)
        task_input = tf.compat.v1.placeholder(tf.float32,
                                              shape=(None, None,
                                                     embedding.input_dim))
        embedding.build(task_input)

        env.reset()
        obs, _, _, _ = env.step(1)

        latent, _ = embedding.forward(obs)
        assert env.action_space.contains(latent)
Exemple #21
0
def test_set_output_size(kernel_sizes, hidden_channels, strides, pool_shape,
                         pool_stride):
    spec = InOutSpec(akro.Box(shape=[3, 19, 15], high=np.inf, low=-np.inf),
                     akro.Box(shape=[200], high=np.inf, low=-np.inf))
    model = CNNModule(spec,
                      image_format='NCHW',
                      hidden_channels=hidden_channels,
                      kernel_sizes=kernel_sizes,
                      strides=strides,
                      pool_shape=[(pool_shape, pool_shape)],
                      pool_stride=[(pool_stride, pool_stride)],
                      layer_normalization=True)
    images = torch.ones(10, 3, 19, 15)
    x = model(images)
    assert x.shape == (10, 200)
Exemple #22
0
    def __init__(self,
                 env_spec,
                 image_format,
                 *,
                 kernel_sizes,
                 hidden_channels,
                 strides,
                 hidden_sizes=(32, 32),
                 cnn_hidden_nonlinearity=torch.nn.ReLU,
                 mlp_hidden_nonlinearity=torch.nn.ReLU,
                 hidden_w_init=nn.init.xavier_uniform_,
                 hidden_b_init=nn.init.zeros_,
                 paddings=0,
                 padding_mode='zeros',
                 max_pool=False,
                 pool_shape=None,
                 pool_stride=1,
                 output_nonlinearity=None,
                 output_w_init=nn.init.xavier_uniform_,
                 output_b_init=nn.init.zeros_,
                 layer_normalization=False):
        super().__init__()

        self._env_spec = env_spec

        self._cnn_module = DiscreteCNNModule(
            spec=InOutSpec(input_space=env_spec.observation_space,
                           output_space=env_spec.action_space),
            image_format=image_format,
            kernel_sizes=kernel_sizes,
            hidden_channels=hidden_channels,
            strides=strides,
            hidden_sizes=hidden_sizes,
            cnn_hidden_nonlinearity=cnn_hidden_nonlinearity,
            mlp_hidden_nonlinearity=mlp_hidden_nonlinearity,
            hidden_w_init=hidden_w_init,
            hidden_b_init=hidden_b_init,
            paddings=paddings,
            padding_mode=padding_mode,
            max_pool=max_pool,
            pool_shape=pool_shape,
            pool_stride=pool_stride,
            output_nonlinearity=output_nonlinearity,
            output_w_init=output_w_init,
            output_b_init=output_b_init,
            layer_normalization=layer_normalization)
Exemple #23
0
    def test_pickling(self):
        obs_dim, action_dim, task_num, latent_dim = (2, ), (2, ), 5, 2
        env = GarageEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim))
        embedding_spec = InOutSpec(
            input_space=akro.Box(low=np.zeros(task_num),
                                 high=np.ones(task_num)),
            output_space=akro.Box(low=np.zeros(latent_dim),
                                  high=np.ones(latent_dim)))
        encoder = GaussianMLPEncoder(embedding_spec)
        policy = GaussianMLPTaskEmbeddingPolicy(env_spec=env.spec,
                                                encoder=encoder)

        pickled = pickle.dumps(policy)
        with tf.compat.v1.variable_scope('resumed'):
            unpickled = pickle.loads(pickled)
            assert hasattr(unpickled, '_f_dist_obs_latent')
            assert hasattr(unpickled, '_f_dist_obs_task')
    def test_get_embedding(self, obs_dim, embedding_dim):
        env = GymEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=embedding_dim))
        embedding_spec = InOutSpec(input_space=env.spec.observation_space,
                                   output_space=env.spec.action_space)
        embedding = GaussianMLPEncoder(embedding_spec)
        task_input = tf.compat.v1.placeholder(tf.float32,
                                              shape=(None, None,
                                                     embedding.input_dim))
        embedding.build(task_input, name='task_input')

        env.reset()
        obs = env.step(env.action_space.sample()).observation

        latent, _ = embedding.get_latent(obs)
        latents, _ = embedding.get_latents([obs] * 5)
        assert env.action_space.contains(latent)
        for latent in latents:
            assert env.action_space.contains(latent)
Exemple #25
0
    def test_get_latent(self):
        obs_dim, action_dim, task_num, latent_dim = (2, ), (2, ), 5, 2
        env = GarageEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim))
        embedding_spec = InOutSpec(
            input_space=akro.Box(low=np.zeros(task_num),
                                 high=np.ones(task_num)),
            output_space=akro.Box(low=np.zeros(latent_dim),
                                  high=np.ones(latent_dim)))
        encoder = GaussianMLPEncoder(embedding_spec)
        policy = GaussianMLPTaskEmbeddingPolicy(env_spec=env.spec,
                                                encoder=encoder)

        task_id = 3
        task_onehot = np.zeros(task_num)
        task_onehot[task_id] = 1
        latent, latent_info = policy.get_latent(task_onehot)
        assert latent.shape == (latent_dim, )
        assert latent_info['mean'].shape == (latent_dim, )
        assert latent_info['log_std'].shape == (latent_dim, )
Exemple #26
0
def test_without_nonlinearity(output_dim, hidden_channels, kernel_sizes,
                              strides):
    input_width = 32
    input_height = 32
    in_channel = 3
    input_shape = (in_channel, input_height, input_width)
    spec = InOutSpec(akro.Box(shape=input_shape, low=-np.inf, high=np.inf),
                     akro.Box(shape=(output_dim, ), low=-np.inf, high=np.inf))

    module = DiscreteCNNModule(spec=spec,
                               image_format='NCHW',
                               hidden_channels=hidden_channels,
                               hidden_sizes=hidden_channels,
                               kernel_sizes=kernel_sizes,
                               strides=strides,
                               mlp_hidden_nonlinearity=None,
                               cnn_hidden_nonlinearity=None,
                               hidden_w_init=nn.init.ones_,
                               output_w_init=nn.init.ones_)

    assert len(module._module) == 3
Exemple #27
0
    def test_encoder_dist_info(self):
        obs_dim, action_dim, task_num, latent_dim = (2, ), (2, ), 5, 2
        env = TfEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim))
        with mock.patch(
                'garage.tf.embeddings.'
                'gaussian_mlp_encoder.GaussianMLPModel',
                new=SimpleGaussianMLPModel):

            old_build = SimpleGaussianMLPModel._build

            def float32_build(this, obs_input, name):
                mean, log_std, std, dist = old_build(this, obs_input, name)
                return mean, tf.cast(log_std, tf.float32), std, dist

            SimpleGaussianMLPModel._build = float32_build

            embedding_spec = InOutSpec(
                input_space=akro.Box(low=np.zeros(task_num),
                                     high=np.ones(task_num)),
                output_space=akro.Box(low=np.zeros(latent_dim),
                                      high=np.ones(latent_dim)))
            encoder = GaussianMLPEncoder(embedding_spec)
            policy = GaussianMLPTaskEmbeddingPolicy(env_spec=env.spec,
                                                    encoder=encoder)

            assert policy.encoder_distribution.dim == latent_dim

            inp_ph = tf.compat.v1.placeholder(tf.float32, shape=(None, 5))
            dist_sym = policy.encoder_dist_info_sym(inp_ph)
            dist = self.sess.run(dist_sym,
                                 feed_dict={inp_ph: [np.random.random(5)]})

            expected_mean = np.full(latent_dim, 0.5)
            expected_log_std = np.full(latent_dim, np.log(0.5))

            assert np.allclose(dist['mean'], expected_mean)
            assert np.allclose(dist['log_std'], expected_log_std)

            SimpleGaussianMLPModel._dtype = np.float32
Exemple #28
0
def test_is_pickleable(output_dim, hidden_channels, kernel_sizes, strides):
    input_width = 32
    input_height = 32
    in_channel = 3
    input_shape = (in_channel, input_height, input_width)
    input_a = torch.ones(input_shape)
    spec = InOutSpec(akro.Box(shape=input_shape, low=-np.inf, high=np.inf),
                     akro.Box(shape=(output_dim, ), low=-np.inf, high=np.inf))

    model = DiscreteCNNModule(spec=spec,
                              image_format='NCHW',
                              hidden_channels=hidden_channels,
                              kernel_sizes=kernel_sizes,
                              mlp_hidden_nonlinearity=nn.ReLU,
                              cnn_hidden_nonlinearity=nn.ReLU,
                              strides=strides)
    output1 = model(input_a)

    h = pickle.dumps(model)
    model_pickled = pickle.loads(h)
    output2 = model_pickled(input_a)

    assert np.array_equal(torch.all(torch.eq(output1, output2)), True)
Exemple #29
0
    def test_auxiliary(self):
        obs_dim, action_dim, task_num, latent_dim = (2, ), (2, ), 2, 2
        env = TfEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim))
        with mock.patch(
                'garage.tf.policies.'
                'gaussian_mlp_task_embedding_policy.GaussianMLPModel',
                new=SimpleGaussianMLPModel):
            embedding_spec = InOutSpec(
                input_space=akro.Box(low=np.zeros(task_num),
                                     high=np.ones(task_num)),
                output_space=akro.Box(low=np.zeros(latent_dim),
                                      high=np.ones(latent_dim)))
            encoder = GaussianMLPEncoder(embedding_spec)
            policy = GaussianMLPTaskEmbeddingPolicy(env_spec=env.spec,
                                                    encoder=encoder)

        assert policy.distribution.dim == env.action_space.flat_dim
        assert policy.encoder == encoder
        assert policy.latent_space.flat_dim == latent_dim
        assert policy.task_space.flat_dim == task_num
        assert (policy.augmented_observation_space.flat_dim ==
                env.observation_space.flat_dim + task_num)
        assert policy.encoder_distribution.dim == latent_dim
Exemple #30
0
    def test_get_latent(self):
        obs_dim, action_dim, task_num, latent_dim = (2, ), (2, ), 5, 2
        env = TfEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim))
        with mock.patch(
                'garage.tf.policies.'
                'gaussian_mlp_task_embedding_policy.GaussianMLPModel',
                new=SimpleGaussianMLPModel):
            embedding_spec = InOutSpec(
                input_space=akro.Box(low=np.zeros(task_num),
                                     high=np.ones(task_num)),
                output_space=akro.Box(low=np.zeros(latent_dim),
                                      high=np.ones(latent_dim)))
            encoder = GaussianMLPEncoder(embedding_spec)
            policy = GaussianMLPTaskEmbeddingPolicy(env_spec=env.spec,
                                                    encoder=encoder)

            task_id = 3
            task_onehot = np.zeros(task_num)
            task_onehot[task_id] = 1
            latent, latent_info = policy.get_latent(task_onehot)
            assert latent.shape == (latent_dim, )
            assert latent_info['mean'].shape == (latent_dim, )
            assert latent_info['log_std'].shape == (latent_dim, )