Ejemplo n.º 1
0
 def _non_streaming(self, inputs):
   outputs = super(Conv1DTranspose, self).call(inputs)
   # during training or non streaming inference, input shape can be dynamic
   output_time_dim = tf.shape(inputs)[1] * self.strides[0]
   if self.crop_output:
     return outputs[:, 0:output_time_dim, :]
   else:
     return outputs
Ejemplo n.º 2
0
def random_stretch_squeeze(inputs,
                           resample_offset,
                           seed=None):
  """Stretches and squeezes audio data in time dim.

  It can be useful for augmenting training data
  with random stretchs squeezes in time dim
  for making model more robust to input audio sampling frequency
  and human speech frequency.

  Args:
    inputs: input tensor [batch_size, time]
    resample_offset: defines stretch squeeze range:
      1-resample_offset...1+resample_offset
    seed: random seed
  Returns:
    masked image
  Raises:
    ValueError: if inputs.shape.rank != 2
  """
  if inputs.shape.rank != 2:
    raise ValueError('inputs.shape.rank:%d must be 2' % inputs.shape.rank)

  inputs_shape = inputs.shape.as_list()
  batch_size = inputs_shape[0]
  sequence_length = inputs_shape[1]

  image = tf.expand_dims(inputs, 2)  # feature
  image = tf.expand_dims(image, 3)  # channels

  resample = 1.0  # when it is equal to 1 - no stretching or squeezing
  time_stretch_squeeze = tf.random.uniform(
      shape=[batch_size],
      minval=resample - resample_offset,
      maxval=resample + resample_offset,
      dtype=tf.float32,
      seed=seed)
  tf.print(time_stretch_squeeze)
  print(time_stretch_squeeze)
  shape = tf.shape(inputs)
  outputs = tf.TensorArray(inputs.dtype, 0, dynamic_size=True)
  for i in tf.range(batch_size):
    image_resized = tf.image.resize(
        images=image[i],
        size=(tf.cast((tf.cast(shape[1], tf.float32) * time_stretch_squeeze[i]),
                      tf.int32), 1),
        preserve_aspect_ratio=False)
    image_resized_cropped = tf.image.resize_with_crop_or_pad(
        image_resized,
        target_height=sequence_length,
        target_width=1,
    )

    outputs = outputs.write(i, image_resized_cropped)

  outputs = tf.squeeze(outputs.stack(), axis=[2, 3])
  outputs.set_shape(inputs_shape)
  return outputs
Ejemplo n.º 3
0
 def call(self, inputs):
     if not self.training or self.rate == 0:
         return inputs
     else:
         if self.noise_shape is None:
             self.noise_shape = tf.shape(inputs)
         noise_mask = tf.keras.backend.random_uniform(
             self.noise_shape, seed=self.seed) < (1 - self.rate)
         return inputs * tf.keras.backend.cast(noise_mask, tf.float32)
Ejemplo n.º 4
0
 def _non_streaming(self, inputs):
     outputs = super(Conv2DTranspose, self).call(inputs)
     # during training or non streaming inference, input shape can be dynamic
     output_time_dim = tf.shape(inputs)[1] * self.strides[0]
     if self.crop_output:
         if self.pad_time_dim == 'same':
             crop_left = self.overlap // 2
             return outputs[:, crop_left:crop_left + output_time_dim, :]
         else:
             return outputs[:, 0:output_time_dim, :]
     else:
         return outputs
Ejemplo n.º 5
0
    def call(self, inputs, training=None):

        if self.rate == 0.0:
            return inputs

        if training is None:
            training = tf.keras.backend.learning_phase()

        if self.noise_shape is None:
            self.noise_shape = tf.shape(inputs)

        return control_flow_util.smart_cond(
            training, lambda: self._non_scaling_drop_op(inputs),
            lambda: array_ops.identity(inputs))
Ejemplo n.º 6
0
    def _non_streaming(self, inputs):
        # transposed conv is a special case
        if isinstance(self.get_core_layer(), tf.keras.layers.Conv2DTranspose):
            outputs = self.cell(inputs)

            # during training or non streaming inference, input shape can be dynamic
            self.output_time_dim = tf.shape(inputs)[1] * self.stride
            if self.transposed_conv_crop_output:
                if self.pad_time_dim == 'same':
                    crop_left = self.ring_buffer_size_in_time_dim // 2
                    return outputs[:, crop_left:crop_left +
                                   self.output_time_dim, :]
                else:
                    return outputs[:, 0:self.output_time_dim, :]
            else:
                return outputs
        else:
            # Pad inputs in time dim: causal or same
            if self.pad_time_dim:
                if isinstance(self.cell,
                              (tf.keras.layers.Flatten,
                               tf.keras.layers.GlobalMaxPooling2D,
                               tf.keras.layers.GlobalAveragePooling2D)):
                    raise ValueError(
                        'pad_time_dim can not be used with Flatten')

                # temporal padding
                pad = [[0, 0]] * inputs.shape.rank
                if self.use_one_step:
                    pad_total_amount = self.ring_buffer_size_in_time_dim - 1
                else:
                    pad_total_amount = self.ring_buffer_size_in_time_dim
                if self.pad_time_dim == 'causal':
                    pad[1] = [pad_total_amount, 0]
                elif self.pad_time_dim == 'same':
                    half = pad_total_amount // 2
                    pad[1] = [half, pad_total_amount - half]
                inputs = tf.pad(inputs, pad, 'constant')

            return self.cell(inputs)
Ejemplo n.º 7
0
def random_cutout(
    inputs,
    mask_size,
    mask_value=0,
    seed=None,
    data_format='channels_last',
):
  """Applies cutout (https://arxiv.org/abs/1708.04552) to inputs.

  It is based on addons/tensorflow_addons/image/cutout_ops.py
  kept here here for backward compatibility

  Args:
    inputs: input tensor [batch_size, time, feature, channels]
    mask_size: mask size (time feature)
    mask_value: mask will be filled with this value
    seed: random seed
    data_format: dimesnions order
  Returns:
    masked image
  Raises:
    ValueError: if inputs.shape.rank != 4
  """

  if inputs.shape.rank != 4:
    raise ValueError('inputs.shape.rank:%d must be 4' % inputs.shape.rank)

  mask_size = tf.convert_to_tensor(mask_size)
  if tf.rank(mask_size) == 0:
    mask_size = tf.stack([mask_size, mask_size])

  if data_format == 'channels_last':
    time_size, feature_size = tf.shape(inputs)[1], tf.shape(inputs)[2]
  else:
    time_size, feature_size = tf.shape(inputs)[2], tf.shape(inputs)[3]

  batch_size = tf.shape(inputs)[0]

  cutout_center_time = tf.random.uniform(
      shape=[batch_size], minval=0, maxval=time_size, dtype=tf.int32, seed=seed
  )
  cutout_center_feature = tf.random.uniform(
      shape=[batch_size],
      minval=0,
      maxval=feature_size,
      dtype=tf.int32,
      seed=seed)
  offset = tf.transpose([cutout_center_time, cutout_center_feature], [1, 0])
  origin_shape = inputs.shape
  offset = tf.convert_to_tensor(offset)
  mask_size = mask_size // 2
  cutout_center_time = offset[:, 0]
  cutout_center_feature = offset[:, 1]

  lower_pads = tf.maximum(0, cutout_center_time - mask_size[0])
  upper_pads = tf.maximum(0, time_size - cutout_center_time - mask_size[0])
  left_pads = tf.maximum(0, cutout_center_feature - mask_size[1])
  right_pads = tf.maximum(0,
                          feature_size - cutout_center_feature - mask_size[1])

  cutout_shape = tf.transpose(
      [
          time_size - (lower_pads + upper_pads),
          feature_size - (left_pads + right_pads),
      ],
      [1, 0],
  )
  masks = tf.TensorArray(inputs.dtype, 0, dynamic_size=True)
  for i in tf.range(tf.shape(cutout_shape)[0]):
    padding_dims = [
        [lower_pads[i], upper_pads[i]],
        [left_pads[i], right_pads[i]],
    ]
    mask = tf.pad(
        tf.zeros(cutout_shape[i], dtype=inputs.dtype),
        padding_dims,
        constant_values=1,
    )
    masks = masks.write(i, mask)

  if data_format == 'channels_last':
    mask = tf.expand_dims(masks.stack(), -1)
    mask = tf.tile(mask, [1, 1, 1, tf.shape(inputs)[-1]])
  else:
    mask = tf.expand_dims(masks.stack(), 1)
    mask = tf.tile(mask, [1, tf.shape(inputs)[1], 1, 1])

  inputs = tf.where(
      tf.equal(mask, 0),
      tf.ones_like(inputs, dtype=inputs.dtype) * mask_value,
      inputs,
  )
  inputs.set_shape(origin_shape)
  return inputs