예제 #1
0
 def create_discrete_action_masking_layer(all_logits, action_masks,
                                          action_size):
     """
     Creates a masking layer for the discrete actions
     :param all_logits: The concatenated unnormalized action probabilities for all branches
     :param action_masks: The mask for the logits. Must be of dimension [None x total_number_of_action]
     :param action_size: A list containing the number of possible actions for each branch
     :return: The action output dimension [batch_size, num_branches], the concatenated
         normalized probs (after softmax)
     and the concatenated normalized log probs
     """
     action_idx = [0] + list(np.cumsum(action_size))
     branches_logits = [
         all_logits[:, action_idx[i]:action_idx[i + 1]]
         for i in range(len(action_size))
     ]
     branch_masks = [
         action_masks[:, action_idx[i]:action_idx[i + 1]]
         for i in range(len(action_size))
     ]
     raw_probs = [
         tf.multiply(
             tf.nn.softmax(branches_logits[k]) + EPSILON, branch_masks[k])
         for k in range(len(action_size))
     ]
     normalized_probs = [
         tf.divide(raw_probs[k],
                   tf.reduce_sum(raw_probs[k], axis=1, keepdims=True))
         for k in range(len(action_size))
     ]
     output = tf.concat(
         [
             tf.multinomial(tf.log(normalized_probs[k] + EPSILON), 1)
             for k in range(len(action_size))
         ],
         axis=1,
     )
     return (
         output,
         tf.concat([normalized_probs[k] for k in range(len(action_size))],
                   axis=1),
         tf.concat(
             [
                 tf.log(normalized_probs[k] + EPSILON)
                 for k in range(len(action_size))
             ],
             axis=1,
         ),
     )
예제 #2
0
 def create_discrete_action_masking_layer(
     branches_logits: List[tf.Tensor],
     action_masks: tf.Tensor,
     action_size: List[int],
 ) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]:
     """
     Creates a masking layer for the discrete actions
     :param branches_logits: A List of the unnormalized action probabilities for each branch
     :param action_masks: The mask for the logits. Must be of dimension [None x total_number_of_action]
     :param action_size: A list containing the number of possible actions for each branch
     :return: The action output dimension [batch_size, num_branches], the concatenated
         normalized probs (after softmax)
     and the concatenated normalized log probs
     """
     branch_masks = ModelUtils.break_into_branches(action_masks,
                                                   action_size)
     raw_probs = [
         tf.multiply(
             tf.nn.softmax(branches_logits[k]) + EPSILON, branch_masks[k])
         for k in range(len(action_size))
     ]
     normalized_probs = [
         tf.divide(raw_probs[k],
                   tf.reduce_sum(raw_probs[k], axis=1, keepdims=True))
         for k in range(len(action_size))
     ]
     output = tf.concat(
         [
             tf.multinomial(tf.log(normalized_probs[k] + EPSILON), 1)
             for k in range(len(action_size))
         ],
         axis=1,
     )
     return (
         output,
         tf.concat([normalized_probs[k] for k in range(len(action_size))],
                   axis=1),
         tf.concat(
             [
                 tf.log(normalized_probs[k] + EPSILON)
                 for k in range(len(action_size))
             ],
             axis=1,
         ),
     )