コード例 #1
0
ファイル: tf_policy.py プロジェクト: junpilan/ml-agents
 def _create_encoder(
     self,
     visual_in: List[tf.Tensor],
     vector_in: tf.Tensor,
     h_size: int,
     num_layers: int,
     vis_encode_type: EncoderType,
 ) -> tf.Tensor:
     """
     Creates an encoder for visual and vector observations.
     :param h_size: Size of hidden linear layers.
     :param num_layers: Number of hidden linear layers.
     :param vis_encode_type: Type of visual encoder to use if visual input.
     :return: The hidden layer (tf.Tensor) after the encoder.
     """
     with tf.variable_scope("policy"):
         encoded = ModelUtils.create_observation_streams(
             self.visual_in,
             self.processed_vector_in,
             1,
             h_size,
             num_layers,
             vis_encode_type,
         )[0]
     return encoded
コード例 #2
0
 def _create_observation_in(self, vis_encode_type):
     """
     Creates the observation inputs, and a CNN if needed,
     :param vis_encode_type: Type of CNN encoder.
     :param share_ac_cnn: Whether or not to share the actor and critic CNNs.
     :return A tuple of (hidden_policy, hidden_critic). We don't save it to self since they're used
     once and thrown away.
     """
     with tf.variable_scope(POLICY_SCOPE):
         hidden_streams = ModelUtils.create_observation_streams(
             self.policy.visual_in,
             self.policy.processed_vector_in,
             1,
             self.h_size,
             0,
             vis_encode_type=vis_encode_type,
             stream_scopes=["critic/value/"],
         )
     hidden_critic = hidden_streams[0]
     return hidden_critic
コード例 #3
0
ファイル: optimizer.py プロジェクト: junpilan/ml-agents
    def _create_cc_critic(
        self, h_size: int, num_layers: int, vis_encode_type: EncoderType
    ) -> None:
        """
        Creates Continuous control critic (value) network.
        :param h_size: Size of hidden linear layers.
        :param num_layers: Number of hidden linear layers.
        :param vis_encode_type: The type of visual encoder to use.
        """
        hidden_stream = ModelUtils.create_observation_streams(
            self.policy.visual_in,
            self.policy.processed_vector_in,
            1,
            h_size,
            num_layers,
            vis_encode_type,
        )[0]

        if self.policy.use_recurrent:
            hidden_value, memory_value_out = ModelUtils.create_recurrent_encoder(
                hidden_stream,
                self.memory_in,
                self.policy.sequence_length_ph,
                name="lstm_value",
            )
            self.memory_out = memory_value_out
        else:
            hidden_value = hidden_stream

        self.value_heads, self.value = ModelUtils.create_value_heads(
            self.stream_names, hidden_value
        )
        self.all_old_log_probs = tf.placeholder(
            shape=[None, sum(self.policy.act_size)],
            dtype=tf.float32,
            name="old_probabilities",
        )

        self.old_log_probs = tf.reduce_sum(
            (tf.identity(self.all_old_log_probs)), axis=1, keepdims=True
        )
コード例 #4
0
    def _create_dc_critic(self, h_size: int, num_layers: int,
                          vis_encode_type: EncoderType) -> None:
        """
        Creates Discrete control critic (value) network.
        :param h_size: Size of hidden linear layers.
        :param num_layers: Number of hidden linear layers.
        :param vis_encode_type: The type of visual encoder to use.
        """
        hidden_stream = ModelUtils.create_observation_streams(
            self.policy.visual_in,
            self.policy.processed_vector_in,
            1,
            h_size,
            num_layers,
            vis_encode_type,
        )[0]

        if self.policy.use_recurrent:
            hidden_value, memory_value_out = ModelUtils.create_recurrent_encoder(
                hidden_stream,
                self.memory_in,
                self.policy.sequence_length_ph,
                name="lstm_value",
            )
            self.memory_out = memory_value_out
        else:
            hidden_value = hidden_stream

        self.value_heads, self.value = ModelUtils.create_value_heads(
            self.stream_names, hidden_value)

        self.all_old_log_probs = tf.placeholder(
            shape=[None, sum(self.policy.act_size)],
            dtype=tf.float32,
            name="old_probabilities",
        )

        # Break old log log_probs into separate branches
        old_log_prob_branches = ModelUtils.break_into_branches(
            self.all_old_log_probs, self.policy.act_size)

        _, _, old_normalized_logits = ModelUtils.create_discrete_action_masking_layer(
            old_log_prob_branches, self.policy.action_masks,
            self.policy.act_size)

        action_idx = [0] + list(np.cumsum(self.policy.act_size))

        self.old_log_probs = tf.reduce_sum(
            (tf.stack(
                [
                    -tf.nn.softmax_cross_entropy_with_logits_v2(
                        labels=self.policy.
                        selected_actions[:, action_idx[i]:action_idx[i + 1]],
                        logits=old_normalized_logits[:, action_idx[i]:
                                                     action_idx[i + 1]],
                    ) for i in range(len(self.policy.act_size))
                ],
                axis=1,
            )),
            axis=1,
            keepdims=True,
        )
コード例 #5
0
    def __init__(
        self,
        policy,
        m_size=None,
        h_size=128,
        normalize=False,
        use_recurrent=False,
        num_layers=2,
        stream_names=None,
        vis_encode_type=EncoderType.SIMPLE,
    ):
        super().__init__(
            policy,
            m_size,
            h_size,
            normalize,
            use_recurrent,
            num_layers,
            stream_names,
            vis_encode_type,
        )
        with tf.variable_scope(TARGET_SCOPE):
            self.vector_in, self.visual_in = ModelUtils.create_input_placeholders(
                self.policy.behavior_spec.observation_shapes)
            if self.policy.normalize:
                normalization_tensors = ModelUtils.create_normalizer(
                    self.vector_in)
                self.update_normalization_op = normalization_tensors.update_op
                self.normalization_steps = normalization_tensors.steps
                self.running_mean = normalization_tensors.running_mean
                self.running_variance = normalization_tensors.running_variance
                self.processed_vector_in = ModelUtils.normalize_vector_obs(
                    self.vector_in,
                    self.running_mean,
                    self.running_variance,
                    self.normalization_steps,
                )
            else:
                self.processed_vector_in = self.vector_in
                self.update_normalization_op = None

            if self.policy.use_recurrent:
                self.memory_in = tf.placeholder(shape=[None, m_size],
                                                dtype=tf.float32,
                                                name="target_recurrent_in")
                self.value_memory_in = self.memory_in
            hidden_streams = ModelUtils.create_observation_streams(
                self.visual_in,
                self.processed_vector_in,
                1,
                self.h_size,
                0,
                vis_encode_type=vis_encode_type,
                stream_scopes=["critic/value/"],
            )
        if self.policy.use_continuous_act:
            self._create_cc_critic(hidden_streams[0],
                                   TARGET_SCOPE,
                                   create_qs=False)
        else:
            self._create_dc_critic(hidden_streams[0],
                                   TARGET_SCOPE,
                                   create_qs=False)
        if self.use_recurrent:
            self.memory_out = tf.concat(self.value_memory_out,
                                        axis=1)  # Needed for Barracuda to work