Exemple #1
0
    def test_packed_bits(self, stacked):
        env = gym.make('gfootball:GFootball-11_vs_11_easy_stochastic-SMM-v0',
                       stacked=stacked)
        env.reset()
        for _ in range(10):
            obs, _, done, _ = env.step(env.action_space.sample())

            baseline_obs = tf.cast(np.array(obs), tf.float32)

            packed_obs = observation.PackedBitsObservation.observation(
                env, obs)
            packed_obs = tf.convert_to_tensor(packed_obs)
            tpu_obs = observation.unpackbits(utils.tpu_encode(packed_obs))
            non_tpu_obs = observation.unpackbits(packed_obs)
            # baseline_obs has less than 16 channels, so first channels should
            # correspond to baseline_obs and then all the rest should be 0
            self.assertAllEqual(baseline_obs, tpu_obs[..., :obs.shape[-1]])
            self.assertAllEqual(baseline_obs, non_tpu_obs[..., :obs.shape[-1]])
            self.assertAllEqual(
                tf.math.reduce_sum(tpu_obs[..., obs.shape[-1]:]), 0)
            self.assertAllEqual(
                tf.math.reduce_sum(non_tpu_obs[..., obs.shape[-1]:]), 0)

            if done:
                env.reset()
        env.close()
Exemple #2
0
    def _torso(self, unused_prev_action, env_output):
        _, _, frame = env_output

        frame = observation.unpackbits(frame)
        frame /= 255

        conv_out = frame
        for stack in self._stacks:
            conv_out = stack(conv_out)

        conv_out = tf.nn.relu(conv_out)
        conv_out = tf.keras.layers.Flatten()(conv_out)

        conv_out = self._conv_to_linear(conv_out)
        return tf.nn.relu(conv_out)