Ejemplo n.º 1
0
    def FProp(self, theta, inputs, paddings):
        """Apply global spatial pooling to inputs.

    Args:
      theta: A `.NestedMap` object containing weights' values of this layer and
        its children layers.
      inputs: The inputs tensor. It is expected to be of shape [batch, time,
        frequency, channel]. The time dimension corresponds to the height
        dimension as in images and the frequency dimension corresponds to the
        width dimension as in images.
      paddings: The paddings tensor. It is expected to be of shape [batch,
        time]. Defaults to None, which means there no paddings.

    Returns:
      outputs, out_paddings pair.
       - outputs: has shape [batch, 1, 1, channel].
       - out_paddings: None or has shape [batch, 1].
    """
        p = self.params
        assert p.pooling_type in ['MAX', 'AVG'], p.pooling_type
        b, t, f = py_utils.GetShape(inputs, ndims=3)

        if paddings is not None:
            paddings = py_utils.HasShape(paddings, [b, t])

        if paddings is not None:
            mask = 1.0 - paddings[..., tf.newaxis, tf.newaxis]
        else:
            mask = tf.ones([b, t, 1, 1], p.dtype)
        if p.pooling_type == 'AVG':
            global_sum = tf.reduce_sum(inputs * mask,
                                       axis=[1, 2],
                                       keepdims=True)
            f = tf.cast(tf.convert_to_tensor(f), p.dtype)
            count = f * tf.reduce_sum(mask, axis=[1, 2], keepdims=True)
            out_feature = global_sum / tf.maximum(1.0, count)
        elif p.pooling_type == 'MAX':
            large_negative = (tf.ones_like(inputs) * p.dtype.max *
                              tf.constant(-0.7, dtype=p.dtype))
            padded_inputs = tf.where_v2(mask > 0.0, inputs, large_negative)
            out_feature = tf.reduce_max(padded_inputs,
                                        axis=[1, 2],
                                        keepdims=True)
        if paddings is None:
            out_paddings = None
        else:
            out_paddings = tf.reduce_min(paddings, axis=1, keepdims=True)
            out_feature *= 1.0 - out_paddings[..., tf.newaxis, tf.newaxis]
        return out_feature, out_paddings
Ejemplo n.º 2
0
        def _PaddedMaxFn(inp):
            """Apply padded max using reduce_max with paddings replaced by neginf."""
            # Replace all padded features with -inf.
            neginf_padding = tf.where(inp.padding > 0, -np.inf * inp.padding,
                                      inp.padding)
            features = inp.features + neginf_padding[..., tf.newaxis]
            features = tf.reduce_max(features, axis=-2)

            # Replace features of all padded points by zeros. If a batch of points are
            # all padded, then reduce_min over the padding will be 1. We set the
            # features to be zero, so that we don't get any downstream issue with
            # NaNs. Note that inf * 0 = NaN.
            padding = tf.reduce_min(inp.padding, axis=-1)
            features = tf.where_v2(tf.cast(padding[..., tf.newaxis], tf.bool),
                                   tf.zeros_like(features), features)
            features = py_utils.CheckNumerics(features)

            if nested_output:
                return py_utils.NestedMap(features=features, padding=padding)
            else:
                return features