def call(self, *args: tf.Tensor, **kwargs: tf.Tensor) -> tf.Tensor:
        """Convert input tensors arguments into a signal tensor."""
        # Don't use `training` or `mask` arguments from keras.Layer.
        for k in ['training', 'mask']:
            if k in kwargs:
                _ = kwargs.pop(k)

        controls = self.get_controls(*args, **kwargs)
        signal = self.get_signal(**controls)
        return signal
Esempio n. 2
0
def save_to_tfrecord(input_frame: tf.Tensor,
                     input_states: Mapping[str, tf.Tensor], frame_index: int,
                     predictions: tf.Tensor, output_states: Mapping[str,
                                                                    tf.Tensor],
                     groundtruth_label_id: tf.Tensor, output_dataset_dir: str,
                     file_index: int):
    """Save results to tfrecord."""
    features = {}
    features['frame_id'] = _int64_feature([frame_index])
    features['groundtruth_label'] = _int64_feature(
        groundtruth_label_id.numpy().flatten().tolist())
    features['predictions'] = _float_feature(
        predictions.numpy().flatten().tolist())
    image_string = tf.io.encode_png(
        tf.squeeze(tf.cast(input_frame * 255., tf.uint8), axis=[0, 1]))
    features['image'] = _bytes_feature(image_string.numpy())

    # Input/Output states at time T
    for k, v in output_states.items():
        dtype = v[0].dtype
        if dtype == tf.int32:
            features['input/' + k] = _int64_feature(
                input_states[k].numpy().flatten().tolist())
            features['output/' + k] = _int64_feature(
                output_states[k].numpy().flatten().tolist())
        elif dtype == tf.float32:
            features['input/' + k] = _float_feature(
                input_states[k].numpy().flatten().tolist())
            features['output/' + k] = _float_feature(
                output_states[k].numpy().flatten().tolist())
        else:
            raise ValueError(f'Unrecongized dtype: {dtype}')

    tfe = _build_tf_example(features)
    record_file = '{}/movinet_stream_{:06d}.tfrecords'.format(
        output_dataset_dir, file_index)
    logging.info('Saving to %s.', record_file)
    with tf.io.TFRecordWriter(record_file) as writer:
        writer.write(tfe)
Esempio n. 3
0
def standardize_image(
    image_bytes: tf.Tensor,
    stddev: Tuple[float, ...],
    num_channels: int = 3,
    dtype: tf.dtypes.DType = tf.float32,
) ->  tf.Tensor:
  """Divides the given stddev from each image channel.

  For example:
    stddev = [123.68, 116.779, 103.939]
    image_bytes = standardize_image(image_bytes, stddev)

  Note that the rank of `image` must be known.

  Args:
    image_bytes: a tensor of size [height, width, C].
    stddev: a C-vector of values to divide from each channel.
    num_channels: number of color channels in the image that will be distorted.
    dtype: the dtype to convert the images to. Set to `None` to skip conversion.

  Returns:
    the centered image.

  Raises:
    ValueError: If the rank of `image` is unknown, if `image` has a rank other
      than three or if the number of channels in `image` doesn't match the
      number of values in `stddev`.
  """
  if image_bytes.get_shape().ndims != 3:
    raise ValueError('Input must be of size [height, width, C>0]')

  if len(stddev) != num_channels:
    raise ValueError('len(stddev) must match the number of channels')

  # We have a 1-D tensor of stddev; convert to 3-D.
  # Note(b/130245863): we explicitly call `broadcast` instead of simply
  # expanding dimensions for better performance.
  stddev = tf.broadcast_to(stddev, tf.shape(image_bytes))
  if dtype is not None:
    stddev = tf.cast(stddev, dtype=dtype)

  return image_bytes / stddev
Esempio n. 4
0
    def call(self, inputs: tf.Tensor) -> Tuple[tf.Tensor, tf.Tensor]:
        """Forward pass of the model.

    Takes as input a sound processed by CARFAC, FFT, and a power calculation
    resulting in a tensor of shape [bsz, CARFAC channels, frequency bins, 2]
    where 2 is signal power and noise power in dB.

    Args:
      inputs: a batch of inputs [bsz, CARFAC channels, frequency bins, 2]

    Returns:
      A tuple of tensors containing the loundess predictions in phons per
      frequency bin and a soft mask over these bins.
    """
        (_, channels, bins, feature_dim) = inputs.get_shape().as_list()
        if not self.use_channels:
            # [bsz, 1, bins, dim] -> [bsz, 1, bins * dim]

            # [bsz, 1, bins, 1] -> [bsz, bins, 1]
            inputs = tf.squeeze(inputs, axis=1)

            # [bsz, bins, 1] -> [bsz, hidden_dim]
            hidden = self.bilstm(inputs)
            hidden = self.dropout(hidden)

            # [bsz, hidden_dim] -> [bsz, 1]
            output = self.relu(self.hidden_to_logits(hidden))

            # [bsz, 1] -> [bsz]
            output = tf.squeeze(output, axis=1)
        else:
            # [bsz, channels, bins, dim] -> [bsz, bins, channels, dim]
            inputs = tf.transpose(inputs, perm=[0, 2, 1, 3])

            # [bsz, bins, 1, dim]
            conditioning = inputs[:, :, -1, :]

            # [bsz, bins, 1, dim]
            coefs = inputs[:, :, -2, :]

            # [bsz, bins + 1]
            coefs = tf.concat(
                [coefs, tf.expand_dims(conditioning[:, 0], axis=2)], axis=1)

            # [bsz, bins + 1, 1] -> [bsz, hidden_dim]
            hidden_two = self.bilstm_two(coefs)

            # [bsz, bins, carfac_channels, dim]
            inputs = inputs[:, :, :-2, :]
            inputs = self.dropout(inputs)

            # [bsz, bins, carfac_shannels, dim] -> [bsz, bins, carfac_channels * dim]
            inputs = tf.reshape(inputs,
                                [-1, bins, (channels - 2) * feature_dim])

            # [bsz, bins, carfac_channels * dim] -> [bsz, bins, hidden_dim]
            hidden = self.features_to_hidden(inputs)

            # [bsz, bins, hidden_dim] -> [bsz, hidden_dim]
            hidden_one = self.bilstm_one(hidden)
            hidden = self.dropout(hidden)
            #       (_, bins, feature_dim) = hidden.get_shape().as_list()

            #       # [bsz, bins, hidden_dim] -> [bsz, bins *  hidden_dim]
            #       hidden_flat = tf.reshape(hidden, [-1, bins * feature_dim])

            # [bsz, hidden_dim] -> [bsz, hidden_dim + hidden_dim]
            hidden = tf.concat([hidden_one, hidden_two], axis=1)

            # [bsz, hidden_dim + hidden_dim] -> [bsz, 1]
            output = self.relu(self.hidden_to_logits(hidden))

        if len(output.get_shape().as_list()) == 1:
            output = tf.expand_dims(output, axis=0)

        return output
Esempio n. 5
0
 def encode(tokens: tf.Tensor):
     """Encodes a sequence of tokens (strings) into a sequence of token IDs."""
     return [[
         vocab[t] if t in vocab else unk_idx for t in tokens.numpy()
     ]]
Esempio n. 6
0
 def tokenize(text: tf.Tensor):
     """Whitespace tokenize text."""
     return [whitespace_tokenize(text.numpy())]
 def _check_value(self, tensor: tf.Tensor, tensorspec: tf.TensorSpec):
     if not tf.TensorShape(tf.squeeze(
             tensor.get_shape())).is_compatible_with(tensorspec.shape):
         raise ValueError(
             'Tensor {} is not compatible with specification {}.'.format(
                 tensor, tensorspec))
Esempio n. 8
0
def stateless_dropout(x: tf.Tensor,
                      rate: float,
                      seed: tf.Tensor,
                      noise_shape: Optional[Union[Sequence[int],
                                                  tf.TensorShape]] = None,
                      name: Optional[Text] = None) -> tf.Tensor:
    """Computes dropout: randomly sets elements to zero to prevent overfitting.

  See https://www.tensorflow.org/api_docs/python/tf/nn/dropout.
  This version differs in that the seed is required if the rate is nonzero.

  Args:
    x: A floating point tensor.
    rate: A scalar `Tensor` with the same type as x. The probability that each
      element is dropped. For example, setting rate=0.1 would drop 10% of input
      elements.
    seed: A shape [2] integer Tensor of seeds to the random number generator.
      Must have dtype `tf.int32` when compiling to XLA.
    noise_shape: A 1-D `Tensor` of type `int32`, representing the shape for
      randomly generated keep/drop flags.
    name: A name for this operation (optional).

  Returns:
    A `Tensor` of the same shape of `x`.

  Raises:
    ValueError: If `rate` is not in `[0, 1)` or if `x` is not a floating point
      tensor. `rate=1` is disallowed, because the output would be all zeros,
      which is likely not what was intended.
  """
    with tf.name_scope(name or 'stateless_dropout') as name:
        x = tf.convert_to_tensor(x, name='x')
        if not x.dtype.is_floating:
            raise ValueError(
                'x has to be a floating point tensor since it\'s going '
                ' to be scaled. Got a %s tensor instead.' % x.dtype)
        if isinstance(rate, numbers.Real):
            if not (rate >= 0 and rate < 1):
                raise ValueError(
                    'rate must be a scalar tensor or a float in the '
                    'range [0, 1), got %g' % rate)
            if rate > 0.5:
                logging.log_first_n(
                    logging.WARN,
                    'Large dropout rate: %g (>0.5). In TensorFlow '
                    '.x, dropout() uses dropout rate instead of keep_prob. '
                    'Please ensure that this is intended.', 5, rate)

        # Early return if nothing needs to be dropped.
        if tf.get_static_value(rate) == 0:
            return x

        rate = tf.convert_to_tensor(rate, dtype=x.dtype, name='rate')
        rate.shape.assert_has_rank(0)
        noise_shape = _get_noise_shape(x, noise_shape)
        # Sample a uniform distribution on [0.0, 1.0) and select values larger than
        # rate.
        #
        # NOTE: Random uniform actually can only generate 2^23 floats on [1.0, 2.0)
        # and subtract 1.0.
        random_tensor = tf.random.stateless_uniform(noise_shape,
                                                    seed=seed,
                                                    dtype=x.dtype)
        keep_prob = 1 - rate
        scale = 1 / keep_prob
        # NOTE: if (1.0 + rate) - 1 is equal to rate, then we want to consider that
        # float to be selected, hence we use a >= comparison.
        keep_mask = random_tensor >= rate
        ret = x * scale * tf.cast(keep_mask, x.dtype)
        if not tf.executing_eagerly():
            ret.set_shape(x.get_shape())
        return ret