Esempio n. 1
0
 def _create_encoder(
     self,
     visual_in: List[tf.Tensor],
     vector_in: tf.Tensor,
     h_size: int,
     num_layers: int,
     vis_encode_type: EncoderType,
 ) -> tf.Tensor:
     """
     Creates an encoder for visual and vector observations.
     :param h_size: Size of hidden linear layers.
     :param num_layers: Number of hidden linear layers.
     :param vis_encode_type: Type of visual encoder to use if visual input.
     :return: The hidden layer (tf.Tensor) after the encoder.
     """
     with tf.variable_scope("policy"):
         encoded = ModelUtils.create_observation_streams(
             self.visual_in,
             self.processed_vector_in,
             1,
             h_size,
             num_layers,
             vis_encode_type,
         )[0]
     return encoded
Esempio n. 2
0
    def _create_dc_critic(self, h_size: int, num_layers: int,
                          vis_encode_type: EncoderType) -> None:
        """
        Creates Discrete control critic (value) network.
        :param h_size: Size of hidden linear layers.
        :param num_layers: Number of hidden linear layers.
        :param vis_encode_type: The type of visual encoder to use.
        """
        hidden_stream = ModelUtils.create_observation_streams(
            self.policy.visual_in,
            self.policy.processed_vector_in,
            1,
            h_size,
            num_layers,
            vis_encode_type,
        )[0]

        if self.policy.use_recurrent:
            hidden_value, memory_value_out = ModelUtils.create_recurrent_encoder(
                hidden_stream,
                self.memory_in,
                self.policy.sequence_length_ph,
                name="lstm_value",
            )
            self.memory_out = memory_value_out
        else:
            hidden_value = hidden_stream

        self.value_heads, self.value = ModelUtils.create_value_heads(
            self.stream_names, hidden_value)

        self.all_old_log_probs = tf.placeholder(
            shape=[None, sum(self.policy.act_size)],
            dtype=tf.float32,
            name="old_probabilities",
        )
        _, _, old_normalized_logits = ModelUtils.create_discrete_action_masking_layer(
            self.all_old_log_probs, self.policy.action_masks,
            self.policy.act_size)

        action_idx = [0] + list(np.cumsum(self.policy.act_size))

        self.old_log_probs = tf.reduce_sum(
            (tf.stack(
                [
                    -tf.nn.softmax_cross_entropy_with_logits_v2(
                        labels=self.policy.
                        selected_actions[:, action_idx[i]:action_idx[i + 1]],
                        logits=old_normalized_logits[:, action_idx[i]:
                                                     action_idx[i + 1]],
                    ) for i in range(len(self.policy.act_size))
                ],
                axis=1,
            )),
            axis=1,
            keepdims=True,
        )
Esempio n. 3
0
 def _create_observation_in(self, vis_encode_type):
     """
     Creates the observation inputs, and a CNN if needed,
     :param vis_encode_type: Type of CNN encoder.
     :param share_ac_cnn: Whether or not to share the actor and critic CNNs.
     :return A tuple of (hidden_policy, hidden_critic). We don't save it to self since they're used
     once and thrown away.
     """
     with tf.variable_scope(POLICY_SCOPE):
         hidden_streams = ModelUtils.create_observation_streams(
             self.policy.visual_in,
             self.policy.processed_vector_in,
             1,
             self.h_size,
             0,
             vis_encode_type=vis_encode_type,
             stream_scopes=["critic/value/"],
         )
     hidden_critic = hidden_streams[0]
     return hidden_critic
Esempio n. 4
0
    def _create_cc_critic(
        self, h_size: int, num_layers: int, vis_encode_type: EncoderType
    ) -> None:
        """
        Creates Continuous control critic (value) network.
        :param h_size: Size of hidden linear layers.
        :param num_layers: Number of hidden linear layers.
        :param vis_encode_type: The type of visual encoder to use.
        """
        hidden_stream = ModelUtils.create_observation_streams(
            self.policy.visual_in,
            self.policy.processed_vector_in,
            1,
            h_size,
            num_layers,
            vis_encode_type,
        )[0]

        if self.policy.use_recurrent:
            hidden_value, memory_value_out = ModelUtils.create_recurrent_encoder(
                hidden_stream,
                self.memory_in,
                self.policy.sequence_length_ph,
                name="lstm_value",
            )
            self.memory_out = memory_value_out
        else:
            hidden_value = hidden_stream

        self.value_heads, self.value = ModelUtils.create_value_heads(
            self.stream_names, hidden_value
        )
        self.all_old_log_probs = tf.placeholder(
            shape=[None, sum(self.policy.act_size)],
            dtype=tf.float32,
            name="old_probabilities",
        )

        self.old_log_probs = tf.reduce_sum(
            (tf.identity(self.all_old_log_probs)), axis=1, keepdims=True
        )
Esempio n. 5
0
    def __init__(
        self,
        policy,
        m_size=None,
        h_size=128,
        normalize=False,
        use_recurrent=False,
        num_layers=2,
        stream_names=None,
        vis_encode_type=EncoderType.SIMPLE,
    ):
        super().__init__(
            policy,
            m_size,
            h_size,
            normalize,
            use_recurrent,
            num_layers,
            stream_names,
            vis_encode_type,
        )
        with tf.variable_scope(TARGET_SCOPE):
            self.visual_in = ModelUtils.create_visual_input_placeholders(
                policy.brain.camera_resolutions
            )
            self.vector_in = ModelUtils.create_vector_input(policy.vec_obs_size)
            if self.policy.normalize:
                normalization_tensors = ModelUtils.create_normalizer(self.vector_in)
                self.update_normalization_op = normalization_tensors.update_op
                self.normalization_steps = normalization_tensors.steps
                self.running_mean = normalization_tensors.running_mean
                self.running_variance = normalization_tensors.running_variance
                self.processed_vector_in = ModelUtils.normalize_vector_obs(
                    self.vector_in,
                    self.running_mean,
                    self.running_variance,
                    self.normalization_steps,
                )
            else:
                self.processed_vector_in = self.vector_in
                self.update_normalization_op = None

            if self.policy.use_recurrent:
                self.memory_in = tf.placeholder(
                    shape=[None, m_size], dtype=tf.float32, name="target_recurrent_in"
                )
                self.value_memory_in = self.memory_in
            hidden_streams = ModelUtils.create_observation_streams(
                self.visual_in,
                self.processed_vector_in,
                1,
                self.h_size,
                0,
                vis_encode_type=vis_encode_type,
                stream_scopes=["critic/value/"],
            )
        if self.policy.use_continuous_act:
            self._create_cc_critic(hidden_streams[0], TARGET_SCOPE, create_qs=False)
        else:
            self._create_dc_critic(hidden_streams[0], TARGET_SCOPE, create_qs=False)
        if self.use_recurrent:
            self.memory_out = tf.concat(
                self.value_memory_out, axis=1
            )  # Needed for Barracuda to work