def test_obs_is_image(self):
        image_env = TfEnv(DummyDiscretePixelEnv(), is_image=True)
        with mock.patch(('garage.tf.policies.'
                         'categorical_cnn_policy.CNNModel._build'),
                        autospec=True,
                        side_effect=CNNModel._build) as build:

            qf = DiscreteCNNQFunction(env_spec=image_env.spec,
                                      filter_dims=(3, ),
                                      num_filters=(5, ),
                                      strides=(2, ),
                                      dueling=False)
            normalized_obs = build.call_args_list[0][0][1]

            input_ph = tf.compat.v1.get_default_graph().get_tensor_by_name(
                'obs:0')

            fake_obs = [np.full(image_env.spec.observation_space.shape, 255)]
            assert (self.sess.run(normalized_obs,
                                  feed_dict={input_ph: fake_obs}) == 1.).all()

            obs_dim = image_env.spec.observation_space.shape
            state_input = tf.compat.v1.placeholder(tf.float32,
                                                   shape=(None, ) + obs_dim)

            qf.get_qval_sym(state_input, name='another')
            normalized_obs = build.call_args_list[1][0][1]

            input_ph = tf.compat.v1.get_default_graph().get_tensor_by_name(
                'Placeholder:0')

            fake_obs = [np.full(image_env.spec.observation_space.shape, 255)]
            assert (self.sess.run(normalized_obs,
                                  feed_dict={input_ph: fake_obs}) == 1.).all()
    def test_obs_not_image(self):
        env = GarageEnv(DummyDiscretePixelEnv(), is_image=False)
        with mock.patch(('garage.tf.baselines.'
                         'gaussian_cnn_baseline.'
                         'GaussianCNNRegressor'),
                        new=SimpleGaussianCNNRegressor):
            with mock.patch(
                    'garage.tf.baselines.'
                    'gaussian_cnn_baseline.'
                    'normalize_pixel_batch',
                    side_effect=normalize_pixel_batch) as npb:

                gcb = GaussianCNNBaseline(env_spec=env.spec)

                obs_dim = env.spec.observation_space.shape
                paths = [{
                    'observations': [np.full(obs_dim, 1)],
                    'returns': [1]
                }, {
                    'observations': [np.full(obs_dim, 2)],
                    'returns': [2]
                }]

                gcb.fit(paths)
                obs = {
                    'observations': [np.full(obs_dim, 1),
                                     np.full(obs_dim, 2)]
                }
                gcb.predict(obs)
                assert not npb.called
Example #3
0
    def test_obs_is_image(self):
        image_env = GarageEnv(DummyDiscretePixelEnv(), is_image=True)
        with mock.patch(('garage.tf.models.'
                         'categorical_cnn_model.CNNModel._build'),
                        autospec=True,
                        side_effect=CNNModel._build) as build:

            qf = DiscreteCNNQFunction(env_spec=image_env.spec,
                                      filters=((5, (3, 3)), ),
                                      strides=(2, ),
                                      dueling=False)
            normalized_obs = build.call_args_list[0][0][1]

            input_ph = qf.input
            assert input_ph != normalized_obs

            fake_obs = [np.full(image_env.spec.observation_space.shape, 255)]
            assert (self.sess.run(normalized_obs,
                                  feed_dict={input_ph: fake_obs}) == 1.).all()

            obs_dim = image_env.spec.observation_space.shape
            state_input = tf.compat.v1.placeholder(tf.uint8,
                                                   shape=(None, ) + obs_dim)

            qf.get_qval_sym(state_input, name='another')
            normalized_obs = build.call_args_list[1][0][1]

            fake_obs = [np.full(image_env.spec.observation_space.shape, 255)]
            assert (self.sess.run(normalized_obs,
                                  feed_dict={state_input:
                                             fake_obs}) == 1.).all()
    def test_is_pickleable(self):
        env = GymEnv(DummyDiscretePixelEnv())
        policy = CategoricalCNNPolicy(env_spec=env.spec,
                                      filters=((3, (32, 32)), ),
                                      strides=(1, ),
                                      padding='SAME',
                                      hidden_sizes=(4, ))

        env.reset()
        obs = env.step(1).observation

        with tf.compat.v1.variable_scope('CategoricalCNNPolicy', reuse=True):
            cnn_bias = tf.compat.v1.get_variable('CNNModel/cnn/h0/bias')
            bias = tf.compat.v1.get_variable('MLPModel/mlp/hidden_0/bias')

        cnn_bias.load(tf.ones_like(cnn_bias).eval())
        bias.load(tf.ones_like(bias).eval())

        state_input = tf.compat.v1.placeholder(tf.float32,
                                               shape=(None, None) +
                                               policy.input_dim)
        dist_sym = policy.build(state_input, name='dist_sym').dist
        output1 = self.sess.run(dist_sym.probs,
                                feed_dict={state_input: [[obs]]})
        p = pickle.dumps(policy)

        with tf.compat.v1.Session(graph=tf.Graph()) as sess:
            policy_pickled = pickle.loads(p)
            state_input = tf.compat.v1.placeholder(tf.float32,
                                                   shape=(None, None) +
                                                   policy.input_dim)
            dist_sym = policy_pickled.build(state_input, name='dist_sym').dist
            output2 = sess.run(dist_sym.probs,
                               feed_dict={state_input: [[obs]]})
            assert np.array_equal(output1, output2)
    def test_is_pickleable(self, kernel_sizes, hidden_channels, strides,
                           paddings):
        """Test if policy is pickable."""
        env = GymEnv(DummyDiscretePixelEnv())
        policy = DiscreteCNNPolicy(env_spec=env.spec,
                                   image_format='NHWC',
                                   hidden_channels=hidden_channels,
                                   kernel_sizes=kernel_sizes,
                                   strides=strides,
                                   paddings=paddings,
                                   padding_mode='zeros',
                                   hidden_w_init=nn.init.ones_,
                                   output_w_init=nn.init.ones_)
        env.reset()
        obs = env.step(1).observation

        output_action_1, _ = policy.get_action(obs.flatten())

        p = cloudpickle.dumps(policy)
        policy_pickled = cloudpickle.loads(p)
        output_action_2, _ = policy_pickled.get_action(obs)

        assert env.action_space.contains(int(output_action_1[0]))
        assert env.action_space.contains(int(output_action_2[0]))
        assert output_action_1.shape == output_action_2.shape
    def test_is_pickleable(self):
        env = GarageEnv(DummyDiscretePixelEnv())
        policy = CategoricalCNNPolicy(env_spec=env.spec,
                                      filters=((3, (32, 32)), ),
                                      strides=(1, ),
                                      padding='SAME',
                                      hidden_sizes=(4, ))

        env.reset()
        obs, _, _, _ = env.step(1)

        with tf.compat.v1.variable_scope(
                'CategoricalCNNPolicy/CategoricalCNNModel', reuse=True):
            cnn_bias = tf.compat.v1.get_variable('CNNModel/cnn/h0/bias')
            bias = tf.compat.v1.get_variable('MLPModel/mlp/hidden_0/bias')

        cnn_bias.load(tf.ones_like(cnn_bias).eval())
        bias.load(tf.ones_like(bias).eval())

        output1 = self.sess.run(policy.distribution.probs,
                                feed_dict={policy.model.input: [[obs]]})
        p = pickle.dumps(policy)

        with tf.compat.v1.Session(graph=tf.Graph()) as sess:
            policy_pickled = pickle.loads(p)
            output2 = sess.run(policy_pickled.distribution.probs,
                               feed_dict={policy_pickled.model.input: [[obs]]})
            assert np.array_equal(output1, output2)
Example #7
0
    def test_get_action(self, mock_rand, filter_dims, num_filters, strides,
                        padding, hidden_sizes):
        mock_rand.return_value = 0
        env = TfEnv(DummyDiscretePixelEnv())
        policy = CategoricalCNNPolicy2(env_spec=env.spec,
                                       filter_dims=filter_dims,
                                       num_filters=num_filters,
                                       strides=strides,
                                       padding=padding,
                                       hidden_sizes=hidden_sizes)
        obs_var = tf.compat.v1.placeholder(tf.float32,
                                           shape=(None, ) +
                                           env.observation_space.shape,
                                           name='obs')
        policy.build(obs_var)

        env.reset()
        obs, _, _, _ = env.step(1)

        action, _ = policy.get_action(obs)
        assert env.action_space.contains(action)

        actions, _ = policy.get_actions([obs, obs, obs])
        for action in actions:
            assert env.action_space.contains(action)
Example #8
0
    def test_obs_is_image(self):
        env = TfEnv(DummyDiscretePixelEnv(), is_image=True)
        with mock.patch(('garage.tf.policies.'
                         'categorical_cnn_policy.CNNModel._build'),
                        autospec=True,
                        side_effect=CNNModel._build) as build:
            policy = CategoricalCNNPolicy(env_spec=env.spec,
                                          conv_filters=(32, ),
                                          conv_filter_sizes=(1, ),
                                          conv_strides=(1, ),
                                          conv_pad='VALID',
                                          hidden_sizes=(3, ))
            normalized_obs = build.call_args_list[0][0][1]

            input_ph = tf.compat.v1.get_default_graph().get_tensor_by_name(
                'Placeholder:0')

            fake_obs = [np.full(env.spec.observation_space.shape, 255)]
            assert (self.sess.run(normalized_obs,
                                  feed_dict={input_ph: fake_obs}) == 1.).all()

            obs_dim = env.spec.observation_space.shape
            state_input = tf.compat.v1.placeholder(tf.float32,
                                                   shape=(None, ) + obs_dim)

            policy.dist_info_sym(state_input, name='another')
            normalized_obs = build.call_args_list[1][0][1]

            input_ph = tf.compat.v1.get_default_graph().get_tensor_by_name(
                'Placeholder_1:0')

            fake_obs = [np.full(env.spec.observation_space.shape, 255)]
            assert (self.sess.run(normalized_obs,
                                  feed_dict={state_input:
                                             fake_obs}) == 1.).all()
Example #9
0
    def test_get_qval_max_pooling(self, filters, strides, pool_strides,
                                  pool_shapes):
        env = GarageEnv(DummyDiscretePixelEnv())
        obs = env.reset()

        with mock.patch(('garage.tf.models.'
                         'cnn_mlp_merge_model.CNNModelWithMaxPooling'),
                        new=SimpleCNNModelWithMaxPooling):
            with mock.patch(('garage.tf.models.'
                             'cnn_mlp_merge_model.MLPMergeModel'),
                            new=SimpleMLPMergeModel):
                qf = ContinuousCNNQFunction(env_spec=env.spec,
                                            filters=filters,
                                            strides=strides,
                                            max_pooling=True,
                                            pool_strides=pool_strides,
                                            pool_shapes=pool_shapes)

        action_dim = env.action_space.shape

        obs, _, _, _ = env.step(1)

        act = np.full(action_dim, 0.5)
        expected_output = np.full((1, ), 0.5)

        outputs = qf.get_qval([obs], [act])

        assert np.array_equal(outputs[0], expected_output)

        outputs = qf.get_qval([obs, obs, obs], [act, act, act])

        for output in outputs:
            assert np.array_equal(output, expected_output)
Example #10
0
def test_transpose_image():
    """Test TransposeImage."""
    original_env = GarageEnv(DummyDiscretePixelEnv(), is_image=True)
    obs_shape = original_env.observation_space.shape
    if len(obs_shape) == 3 and obs_shape[2] in [1, 3]:
        transposed_env = TransposeImage(original_env)
        assert (original_env.observation_space.shape[2] ==
                transposed_env.observation_space.shape[0])
Example #11
0
    def test_noop(self):
        env = Noop(DummyDiscretePixelEnv(), noop_max=3)

        for _ in range(1000):
            env.reset()
            assert 1 <= env.env.step_called <= 3

        env = Noop(DummyDiscretePixelEnv(), noop_max=10)
        for _ in range(1000):
            obs = env.reset()
            if env.env.step_called % 5 == 0:
                # There are only 5 lives in the environment, so if number of
                # steps are multiple of 5, env will call reset at last.
                assert np.array_equal(obs,
                                      np.ones(env.observation_space.shape))
            else:
                assert not np.array_equal(obs,
                                          np.ones(env.observation_space.shape))
    def test_clone(self, filters, strides, padding, hidden_sizes):
        env = GarageEnv(DummyDiscretePixelEnv())
        policy = CategoricalCNNPolicy(env_spec=env.spec,
                                      filters=filters,
                                      strides=strides,
                                      padding=padding,
                                      hidden_sizes=hidden_sizes)

        policy_clone = policy.clone('CategoricalCNNPolicyClone')
        assert policy.env_spec == policy_clone.env_spec
    def test_clone(self, filters, strides, padding, hidden_sizes):
        env = GymEnv(DummyDiscretePixelEnv())
        policy = CategoricalCNNPolicy(env_spec=env.spec,
                                      filters=filters,
                                      strides=strides,
                                      padding=padding,
                                      hidden_sizes=hidden_sizes)

        policy_clone = policy.clone('CategoricalCNNPolicyClone')
        assert policy.env_spec == policy_clone.env_spec
        for cloned_param, param in zip(policy_clone.parameters.values(),
                                       policy.parameters.values()):
            assert np.array_equal(cloned_param, param)
Example #14
0
    def test_fire_reset(self):
        env = DummyDiscretePixelEnv()
        env_wrap = FireReset(env)
        obs = env.reset()
        obs_wrap = env_wrap.reset()

        assert np.array_equal(obs, np.ones(env.observation_space.shape))
        assert np.array_equal(obs_wrap, np.full(env.observation_space.shape,
                                                2))

        env_wrap.step(2)
        obs_wrap = env_wrap.reset()  # env will call reset again, after fire
        assert np.array_equal(obs_wrap, np.ones(env.observation_space.shape))
    def test_obs_not_image(self):
        env = TfEnv(DummyDiscretePixelEnv())

        with mock.patch(('tests.fixtures.models.SimpleCNNModel._build'),
                        autospec=True,
                        side_effect=SimpleCNNModel._build) as build:
            with mock.patch(('garage.tf.models.'
                             'cnn_mlp_merge_model.CNNModel'),
                            new=SimpleCNNModel):
                with mock.patch(('garage.tf.models.'
                                 'cnn_mlp_merge_model.MLPMergeModel'),
                                new=SimpleMLPMergeModel):

                    qf = ContinuousCNNQFunction(env_spec=env.spec,
                                                filter_dims=(3, ),
                                                num_filters=(5, ),
                                                strides=(1, ))

                    # ensure non-image obses are not normalized
                    # in _initialize() and get_qval()

                    normalized_obs = build.call_args_list[0][0][1]
                    assert normalized_obs == qf.inputs[0]

                    fake_obs = [
                        np.full(env.spec.observation_space.shape, 255.)
                    ]

                    assert (self.sess.run(normalized_obs,
                                          feed_dict={qf.inputs[0]:
                                                     fake_obs}) == 255.).all()

                    # ensure non-image obses are not normalized
                    # in get_qval_sym()

                    obs_dim = env.spec.observation_space.shape
                    state_input = tf.compat.v1.placeholder(tf.float32,
                                                           shape=(None, ) +
                                                           obs_dim)

                    act_dim = env.spec.observation_space.shape
                    action_input = tf.compat.v1.placeholder(tf.float32,
                                                            shape=(None, ) +
                                                            act_dim)

                    qf.get_qval_sym(state_input, action_input, name='another')
                    normalized_obs = build.call_args_list[1][0][1]

                    assert (self.sess.run(normalized_obs,
                                          feed_dict={state_input:
                                                     fake_obs}) == 255.).all()
 def test_get_action(self, hidden_channels, kernel_sizes, strides,
                     hidden_sizes):
     """Test get_action function."""
     env = GymEnv(DummyDiscretePixelEnv(), is_image=True)
     policy = CategoricalCNNPolicy(env_spec=env.spec,
                                   image_format='NHWC',
                                   kernel_sizes=kernel_sizes,
                                   hidden_channels=hidden_channels,
                                   strides=strides,
                                   hidden_sizes=hidden_sizes)
     env.reset()
     obs = env.step(1).observation
     action, _ = policy.get_action(obs)
     assert env.action_space.contains(action)
Example #17
0
 def test_get_action(self, hidden_channels, kernel_sizes, strides,
                     hidden_sizes):
     """Test get_action function."""
     env = DummyDiscretePixelEnv()
     env = self._initialize_obs_env(env)
     policy = CategoricalCNNPolicy(env=env,
                                   kernel_sizes=kernel_sizes,
                                   hidden_channels=hidden_channels,
                                   strides=strides,
                                   hidden_sizes=hidden_sizes)
     env.reset()
     obs, _, _, _ = env.step(1)
     action, _ = policy.get_action(obs)
     assert env.action_space.contains(action)
 def test_flattened_image_input(self):
     env = GymEnv(DummyDiscretePixelEnv(), is_image=True)
     gcb = GaussianCNNBaseline(env_spec=env.spec,
                               filters=((3, (3, 3)), (6, (3, 3))),
                               strides=(1, 1),
                               padding='SAME',
                               hidden_sizes=(32, ))
     env.reset()
     es = env.step(1)
     obs, rewards = es.observation, es.reward
     train_paths = [{'observations': [obs.flatten()], 'returns': [rewards]}]
     gcb.fit(train_paths)
     paths = {'observations': [obs.flatten()]}
     prediction = gcb.predict(paths)
     assert np.allclose(0., prediction)
Example #19
0
    def test_get_action_img_obs(self, hidden_channels, kernel_sizes, strides,
                                hidden_sizes):
        """Test get_action function with akro.Image observation space."""
        env = GarageEnv(DummyDiscretePixelEnv(), is_image=True)
        env = self._initialize_obs_env(env)
        policy = CategoricalCNNPolicy(env=env,
                                      kernel_sizes=kernel_sizes,
                                      hidden_channels=hidden_channels,
                                      strides=strides,
                                      hidden_sizes=hidden_sizes)
        env.reset()
        obs, _, _, _ = env.step(1)

        action, _ = policy.get_action(obs)
        assert env.action_space.contains(action)
 def test_obs_unflattened(self, hidden_channels, kernel_sizes, strides,
                          hidden_sizes):
     """Test if a flattened image obs is passed to get_action
        then it is unflattened.
     """
     env = GymEnv(DummyDiscretePixelEnv(), is_image=True)
     env.reset()
     policy = CategoricalCNNPolicy(env_spec=env.spec,
                                   image_format='NHWC',
                                   kernel_sizes=kernel_sizes,
                                   hidden_channels=hidden_channels,
                                   strides=strides,
                                   hidden_sizes=hidden_sizes)
     obs = env.observation_space.sample()
     action, _ = policy.get_action(env.observation_space.flatten(obs))
     env.step(action)
Example #21
0
    def test_obs_is_image(self):
        image_env = GarageEnv(DummyDiscretePixelEnv(), is_image=True)
        with mock.patch(('tests.fixtures.models.SimpleCNNModel._build'),
                        autospec=True,
                        side_effect=SimpleCNNModel._build) as build:
            with mock.patch(('garage.tf.models.'
                             'cnn_mlp_merge_model.CNNModel'),
                            new=SimpleCNNModel):
                with mock.patch(('garage.tf.models.'
                                 'cnn_mlp_merge_model.MLPMergeModel'),
                                new=SimpleMLPMergeModel):

                    qf = ContinuousCNNQFunction(env_spec=image_env.spec,
                                                filters=((5, (3, 3)), ),
                                                strides=(1, ))

                    fake_obs = [
                        np.full(image_env.spec.observation_space.shape, 255)
                    ]

                    # make sure image obses are normalized in _initialize()
                    # and get_qval
                    normalized_obs = build.call_args_list[0][0][1]
                    assert normalized_obs != qf.inputs[0]

                    assert (self.sess.run(normalized_obs,
                                          feed_dict={qf.inputs[0]:
                                                     fake_obs}) == 1.).all()

                    # make sure image obses are normalized in get_qval_sim()

                    obs_dim = image_env.spec.observation_space.shape
                    state_input = tf.compat.v1.placeholder(tf.float32,
                                                           shape=(None, ) +
                                                           obs_dim)

                    act_dim = image_env.spec.observation_space.shape
                    action_input = tf.compat.v1.placeholder(tf.float32,
                                                            shape=(None, ) +
                                                            act_dim)

                    qf.build(state_input, action_input, name='another')
                    normalized_obs = build.call_args_list[1][0][1]

                    assert (self.sess.run(normalized_obs,
                                          feed_dict={state_input:
                                                     fake_obs}) == 1.).all()
    def test_get_action(self, filters, strides, padding, hidden_sizes):
        env = GymEnv(DummyDiscretePixelEnv())
        policy = CategoricalCNNPolicy(env_spec=env.spec,
                                      filters=filters,
                                      strides=strides,
                                      padding=padding,
                                      hidden_sizes=hidden_sizes)

        env.reset()
        obs = env.step(1).observation

        action, _ = policy.get_action(obs)
        assert env.action_space.contains(action)

        actions, _ = policy.get_actions([obs, obs, obs])
        for action in actions:
            assert env.action_space.contains(action)
    def test_get_action(self, kernel_sizes, hidden_channels, strides,
                        paddings):
        """Test get_action function."""
        env = GymEnv(DummyDiscretePixelEnv())
        policy = DiscreteCNNPolicy(env_spec=env.spec,
                                   image_format='NHWC',
                                   hidden_channels=hidden_channels,
                                   kernel_sizes=kernel_sizes,
                                   strides=strides,
                                   paddings=paddings,
                                   padding_mode='zeros',
                                   hidden_w_init=nn.init.ones_,
                                   output_w_init=nn.init.ones_)
        env.reset()
        obs = env.step(1).observation

        action, _ = policy.get_action(obs.flatten())
        assert env.action_space.contains(int(action[0]))
Example #24
0
    def test_clone(self, filters, strides):
        env = MetaRLEnv(DummyDiscretePixelEnv())

        with mock.patch(('metarl.tf.models.'
                         'cnn_mlp_merge_model.CNNModel'),
                        new=SimpleCNNModel):
            with mock.patch(('metarl.tf.models.'
                             'cnn_mlp_merge_model.MLPMergeModel'),
                            new=SimpleMLPMergeModel):
                qf = ContinuousCNNQFunction(env_spec=env.spec,
                                            filters=filters,
                                            strides=strides)

                qf_clone = qf.clone('another_qf')

        # pylint: disable=protected-access
        assert qf_clone._filters == qf._filters
        assert qf_clone._strides == qf._strides
    def test_build(self, filters, strides, padding, hidden_sizes):
        env = GarageEnv(DummyDiscretePixelEnv())
        policy = CategoricalCNNPolicy(env_spec=env.spec,
                                      filters=filters,
                                      strides=strides,
                                      padding=padding,
                                      hidden_sizes=hidden_sizes)

        obs = env.reset()

        state_input = tf.compat.v1.placeholder(tf.float32,
                                               shape=(None, None) +
                                               policy.input_dim)
        dist_sym = policy.build(state_input, name='dist_sym').dist
        output1 = self.sess.run([policy.distribution.probs],
                                feed_dict={policy.model.input: [[obs]]})
        output2 = self.sess.run([dist_sym.probs],
                                feed_dict={state_input: [[obs]]})
        assert np.array_equal(output1, output2)
    def test_get_qval(self, filter_dims, num_filters, strides):
        env = TfEnv(DummyDiscretePixelEnv())
        obs = env.reset()

        with mock.patch(('garage.tf.models.'
                         'cnn_mlp_merge_model.CNNModel'),
                        new=SimpleCNNModel):
            with mock.patch(('garage.tf.models.'
                             'cnn_mlp_merge_model.MLPMergeModel'),
                            new=SimpleMLPMergeModel):
                qf = ContinuousCNNQFunction(env_spec=env.spec,
                                            filter_dims=filter_dims,
                                            num_filters=num_filters,
                                            strides=strides)

        action_dim = env.action_space.shape

        obs, _, _, _ = env.step(1)

        act = np.full(action_dim, 0.5)
        expected_output = np.full((1, ), 0.5)

        outputs = qf.get_qval([obs], [act])

        assert np.array_equal(outputs[0], expected_output)

        outputs = qf.get_qval([obs, obs, obs], [act, act, act])

        for output in outputs:
            assert np.array_equal(output, expected_output)

        # make sure observations are unflattened

        obs = env.observation_space.flatten(obs)
        qf._f_qval = mock.MagicMock()

        qf.get_qval([obs], [act])
        unflattened_obs = qf._f_qval.call_args_list[0][0][0]
        assert unflattened_obs.shape[1:] == env.spec.observation_space.shape

        qf.get_qval([obs, obs], [act, act])
        unflattened_obs = qf._f_qval.call_args_list[1][0][0]
        assert unflattened_obs.shape[1:] == env.spec.observation_space.shape
    def test_get_actions(self, hidden_channels, kernel_sizes, strides,
                         hidden_sizes):
        """Test get_actions function with akro.Image observation space."""
        env = GymEnv(DummyDiscretePixelEnv(), is_image=True)
        policy = CategoricalCNNPolicy(env_spec=env.spec,
                                      image_format='NHWC',
                                      kernel_sizes=kernel_sizes,
                                      hidden_channels=hidden_channels,
                                      strides=strides,
                                      hidden_sizes=hidden_sizes)
        env.reset()
        obs = env.step(1).observation

        actions, _ = policy.get_actions([obs, obs, obs])
        for action in actions:
            assert env.action_space.contains(action)
        torch_obs = torch.Tensor(obs)
        actions, _ = policy.get_actions([torch_obs, torch_obs, torch_obs])
        for action in actions:
            assert env.action_space.contains(action)
    def test_obs_unflattened(self, kernel_sizes, hidden_channels, strides,
                             paddings):
        """Test if a flattened image obs is passed to get_action
           then it is unflattened.
        """
        env = GymEnv(DummyDiscretePixelEnv())
        env.reset()
        policy = DiscreteCNNPolicy(env_spec=env.spec,
                                   image_format='NHWC',
                                   hidden_channels=hidden_channels,
                                   kernel_sizes=kernel_sizes,
                                   strides=strides,
                                   paddings=paddings,
                                   padding_mode='zeros',
                                   hidden_w_init=nn.init.ones_,
                                   output_w_init=nn.init.ones_)

        obs = env.observation_space.sample()
        action, _ = policy.get_action(env.observation_space.flatten(obs))
        env.step(action[0])
Example #29
0
    def test_get_actions(self, hidden_channels, kernel_sizes, strides,
                         hidden_sizes):
        """Test get_actions function with akro.Image observation space."""
        env = DummyDiscretePixelEnv()
        env = self._initialize_obs_env(env)
        policy = CategoricalCNNPolicy(env=env,
                                      kernel_sizes=kernel_sizes,
                                      hidden_channels=hidden_channels,
                                      strides=strides,
                                      hidden_sizes=hidden_sizes)
        env.reset()
        obs, _, _, _ = env.step(1)

        actions, _ = policy.get_actions([obs, obs, obs])
        for action in actions:
            assert env.action_space.contains(action)
        torch_obs = torch.Tensor(obs)
        actions, _ = policy.get_actions([torch_obs, torch_obs, torch_obs])
        for action in actions:
            assert env.action_space.contains(action)
Example #30
0
    def test_is_pickleable(self):
        env = TfEnv(DummyDiscretePixelEnv())
        policy = CategoricalCNNPolicy2(env_spec=env.spec,
                                       filter_dims=(32, ),
                                       num_filters=(3, ),
                                       strides=(1, ),
                                       padding='SAME',
                                       hidden_sizes=(4, ))
        obs_var = tf.compat.v1.placeholder(tf.float32,
                                           shape=(None, ) +
                                           env.observation_space.shape,
                                           name='obs')
        policy.build(obs_var)

        env.reset()
        obs, _, _, _ = env.step(1)

        with tf.compat.v1.variable_scope(
                'CategoricalCNNPolicy/CategoricalCNNModel/Sequential',
                reuse=True):
            cnn_bias = tf.compat.v1.get_variable('CNNModel/cnn/h0/bias')
            bias = tf.compat.v1.get_variable('MLPModel/mlp/hidden_0/bias')

        cnn_bias.load(tf.ones_like(cnn_bias).eval())
        bias.load(tf.ones_like(bias).eval())

        output1 = self.sess.run(policy.distribution.logits,
                                feed_dict={policy.model.input: [obs]})
        p = pickle.dumps(policy)

        with tf.compat.v1.Session(graph=tf.Graph()) as sess:
            policy_pickled = pickle.loads(p)
            obs_var = tf.compat.v1.placeholder(tf.float32,
                                               shape=(None, ) +
                                               env.observation_space.shape,
                                               name='obs')
            policy_pickled.build(obs_var)
            output2 = sess.run(policy_pickled.distribution.logits,
                               feed_dict={policy_pickled.model.input: [obs]})
            assert np.array_equal(output1, output2)