Esempio n. 1
0
    def call(self, inputs):

        if inputs.shape.rank < 2:
            raise ValueError('inputs.shape.rank: %d must be >= 2' %
                             inputs.shape.rank)

        if self.mode in [
                modes.Modes.STREAM_INTERNAL_STATE_INFERENCE,
                modes.Modes.STREAM_EXTERNAL_STATE_INFERENCE
        ] or self.padding == 'valid' or self.padding_size == 0:
            # padding is not applied in streaming mode or on valid
            return inputs

        pad = [[0, 0]] * inputs.shape.rank

        if self.padding == 'causal':
            pad[1] = [self.padding_size, 0]
        elif self.padding == 'future':
            pad[1] = [0, self.padding_size]
        elif self.padding == 'same':
            half = (self.padding_size // 2 if self.padding_size >= 0 else
                    (self.padding_size + 1) // 2)
            pad[1] = [half, self.padding_size - half]

        if self.padding_size >= 0:
            inputs = tf.pad(inputs, pad, 'constant')
        else:  # Crop:
            crop_left = -pad[1][0]
            crop_right = -pad[1][1]
            if crop_right > 0:
                inputs = inputs[:, crop_left:-crop_right]
            else:
                inputs = inputs[:, crop_left:]
        return inputs
Esempio n. 2
0
    def _get_expected_output(self, dilation_rate=(1, 1), stacked=False):
        # Pad the front to match the padding of the streamed version
        dilated_kernel_size = dilation_rate[0] * (self.kernel_size[0] - 1) + 1
        inputs_conv = np.pad(self.inputs,
                             ((0, 0), (dilated_kernel_size - 1, 0), (0, 0),
                              (0, 0)), 'constant')

        # Put through basic convolution layer
        layer = tf.keras.layers.Conv2D(self.filters,
                                       self.kernel_size,
                                       dilation_rate=dilation_rate,
                                       kernel_initializer='ones')
        inputs = tf.keras.layers.Input(
            shape=(self.time_dim + dilated_kernel_size - 1, self.feature_dim,
                   1),
            batch_size=self.batch_size)
        outputs = layer(inputs)

        # Stacking 2 convolutional layers on top of each other.
        if stacked:
            padded_outputs = tf.pad(outputs,
                                    ((0, 0), (dilated_kernel_size - 1, 0),
                                     (0, 0), (0, 0)), 'constant')

            layer = tf.keras.layers.Conv2D(self.filters,
                                           self.kernel_size,
                                           dilation_rate=dilation_rate,
                                           kernel_initializer='ones')
            outputs = layer(padded_outputs)

        model = tf.keras.Model(inputs, outputs)
        model_output = model.predict(inputs_conv)
        return model_output
Esempio n. 3
0
def frequeny_pad(inputs, dilation, stride, kernel_size):
  """Pads input tensor in frequency domain.

  Args:
    inputs: input tensor
    dilation: dilation in frequency dim
    stride: stride in frequency dim
    kernel_size: kernel_size in frequency dim

  Returns:
    padded tensor

  Raises:
    ValueError: if any of input rank is < 3
  """

  # expected input: [N, Time, Frequency, ...]
  if inputs.shape.rank < 3:
    raise ValueError('input_shape.rank:%d must be at least 3' %
                     inputs.shape.rank)

  kernel_size = (kernel_size - 1) * dilation + 1
  total_pad = kernel_size - stride

  pad_left = total_pad // 2
  pad_right = total_pad - pad_left

  pad = [[0, 0]] * inputs.shape.rank
  pad[2] = [pad_left, pad_right]
  return tf.pad(inputs, pad, 'constant')
Esempio n. 4
0
 def _non_streaming(self, inputs):
   # Zero pad inputs in time dime, from the left to make convolution causal.
   if self.pad_time_dim:
     if isinstance(self.cell, tf.keras.layers.Conv2D) or isinstance(
         self.cell, tf.keras.layers.DepthwiseConv2D):
       inputs = tf.pad(inputs, ((0, 0), (self.effective_ksize_tdim - 1, 0),
                                (0, 0), (0, 0)), 'constant')
   return self.cell(inputs)
Esempio n. 5
0
def random_shift(inputs, time_shift, seed=None):
    """Shifts input data randomly in time dim.

  It can be useful for augmenting training data with random shifts in time dim
  for making model more robust to input audio shifts

  Args:
    inputs: input tensor [batch_size, time]
    time_shift: defines time shift range: -time_shift...time_shift
      it is defiend in samples
    seed: random seed
  Returns:
    masked image
  Raises:
    ValueError: if inputs.shape.rank != 2
  """
    if inputs.shape.rank != 2:
        raise ValueError('inputs.shape.rank:%d must be 2' % inputs.shape.rank)

    inputs_shape = inputs.shape.as_list()
    batch_size = inputs_shape[0]
    sequence_length = inputs_shape[1]

    # below function will process 2D arrays, convert it to [batch, time, dummy]
    inputs = tf.expand_dims(inputs, 2)

    time_shift_amounts = tf.random.uniform(shape=[batch_size],
                                           minval=-time_shift,
                                           maxval=time_shift,
                                           dtype=tf.int32,
                                           seed=seed)

    outputs = tf.TensorArray(inputs.dtype, 0, dynamic_size=True)
    for i in tf.range(batch_size):
        time_shift_amount = time_shift_amounts[i]

        # pylint: disable=cell-var-from-loop
        time_shift_padding = tf.cond(time_shift_amount > 0,
                                     lambda: [[time_shift_amount, 0], [0, 0]],
                                     lambda: [[0, -time_shift_amount], [0, 0]])
        time_shift_offset = tf.cond(time_shift_amount > 0, lambda: [0, 0],
                                    lambda: [-time_shift_amount, 0])
        # pylint: enable=cell-var-from-loop

        padded = tf.pad(tensor=inputs[i],
                        paddings=time_shift_padding,
                        mode='CONSTANT')
        padded_sliced = tf.slice(padded, time_shift_offset,
                                 [sequence_length, -1])

        outputs = outputs.write(i, padded_sliced)

    # convert it back to [batch, time]
    outputs = tf.squeeze(outputs.stack(), axis=[2])
    outputs.set_shape(inputs_shape)
    return outputs
Esempio n. 6
0
    def _non_streaming(self, inputs):
        # Pad inputs in time dim: causal or same
        if self.pad_time_dim:
            if isinstance(self.cell, tf.keras.layers.Flatten):
                raise ValueError('pad_time_dim can not be used with Flatten')

            # temporal padding
            pad = [[0, 0]] * inputs.shape.rank
            if self.pad_time_dim == 'causal':
                pad[1] = [self.ring_buffer_size_in_time_dim - 1, 0]
            elif self.pad_time_dim == 'same':
                half = self.ring_buffer_size_in_time_dim // 2
                pad[1] = [half, half]
            inputs = tf.pad(inputs, pad, 'constant')

        return self.cell(inputs)
Esempio n. 7
0
    def _non_streaming(self, inputs):
        # transposed conv is a special case
        if isinstance(self.get_core_layer(), tf.keras.layers.Conv2DTranspose):
            outputs = self.cell(inputs)

            # during training or non streaming inference, input shape can be dynamic
            self.output_time_dim = tf.shape(inputs)[1] * self.stride
            if self.transposed_conv_crop_output:
                if self.pad_time_dim == 'same':
                    crop_left = self.ring_buffer_size_in_time_dim // 2
                    return outputs[:, crop_left:crop_left +
                                   self.output_time_dim, :]
                else:
                    return outputs[:, 0:self.output_time_dim, :]
            else:
                return outputs
        else:
            # Pad inputs in time dim: causal or same
            if self.pad_time_dim:
                if isinstance(self.cell,
                              (tf.keras.layers.Flatten,
                               tf.keras.layers.GlobalMaxPooling2D,
                               tf.keras.layers.GlobalAveragePooling2D)):
                    raise ValueError(
                        'pad_time_dim can not be used with Flatten')

                # temporal padding
                pad = [[0, 0]] * inputs.shape.rank
                if self.use_one_step:
                    pad_total_amount = self.ring_buffer_size_in_time_dim - 1
                else:
                    pad_total_amount = self.ring_buffer_size_in_time_dim
                if self.pad_time_dim == 'causal':
                    pad[1] = [pad_total_amount, 0]
                elif self.pad_time_dim == 'same':
                    half = pad_total_amount // 2
                    pad[1] = [half, pad_total_amount - half]
                inputs = tf.pad(inputs, pad, 'constant')

            return self.cell(inputs)
Esempio n. 8
0
  def call(self, inputs):

    if inputs.shape.rank < 2:
      raise ValueError('inputs.shape.rank: %d must be >= 2' % inputs.shape.rank)

    if self.mode in [
        Modes.STREAM_INTERNAL_STATE_INFERENCE,
        Modes.STREAM_EXTERNAL_STATE_INFERENCE
    ] or self.padding == 'valid':
      # padding is not applied in streaming mode or on valid
      return inputs

    pad = [[0, 0]] * inputs.shape.rank

    if self.padding == 'causal':
      pad[1] = [self.padding_size, 0]
    elif self.padding == 'same':
      half = self.padding_size // 2
      pad[1] = [half, half]

    inputs = tf.pad(inputs, pad, 'constant')
    return inputs
Esempio n. 9
0
  def _non_streaming(self, inputs):
    # Pad inputs in time dim: causal or same
    if self.pad_time_dim:
      if isinstance(
          self.cell,
          (tf.keras.layers.Flatten, tf.keras.layers.GlobalMaxPooling2D,
           tf.keras.layers.GlobalAveragePooling2D)):
        raise ValueError('pad_time_dim can not be used with Flatten')

      # temporal padding
      pad = [[0, 0]] * inputs.shape.rank
      if self.use_one_step:
        pad_total_amount = self.ring_buffer_size_in_time_dim - 1
      else:
        pad_total_amount = self.ring_buffer_size_in_time_dim
      if self.pad_time_dim == 'causal':
        pad[1] = [pad_total_amount, 0]
      elif self.pad_time_dim == 'same':
        half = pad_total_amount // 2
        pad[1] = [half, pad_total_amount - half]
      inputs = tf.pad(inputs, pad, 'constant')

    return self.cell(inputs)
Esempio n. 10
0
 def _non_streaming(self, inputs):
     if self.also_in_non_streaming:
         return tf.pad(inputs, ((0, 0), (self.delay, 0),
                                (0, 0)))[:, :-self.delay, :]
     else:
         return inputs
Esempio n. 11
0
 def _non_streaming(self, inputs):
     if self.also_in_non_streaming:
         return tf.pad(inputs, ((0, 0), (self.delay, 0)) + ((0, 0), ) *
                       (inputs.shape.rank - 2))[:, :-self.delay]
     else:
         return inputs
Esempio n. 12
0
def random_cutout(
    inputs,
    mask_size,
    mask_value=0,
    seed=None,
    data_format='channels_last',
):
  """Applies cutout (https://arxiv.org/abs/1708.04552) to inputs.

  It is based on addons/tensorflow_addons/image/cutout_ops.py
  kept here here for backward compatibility

  Args:
    inputs: input tensor [batch_size, time, feature, channels]
    mask_size: mask size (time feature)
    mask_value: mask will be filled with this value
    seed: random seed
    data_format: dimesnions order
  Returns:
    masked image
  Raises:
    ValueError: if inputs.shape.rank != 4
  """

  if inputs.shape.rank != 4:
    raise ValueError('inputs.shape.rank:%d must be 4' % inputs.shape.rank)

  mask_size = tf.convert_to_tensor(mask_size)
  if tf.rank(mask_size) == 0:
    mask_size = tf.stack([mask_size, mask_size])

  if data_format == 'channels_last':
    time_size, feature_size = tf.shape(inputs)[1], tf.shape(inputs)[2]
  else:
    time_size, feature_size = tf.shape(inputs)[2], tf.shape(inputs)[3]

  batch_size = tf.shape(inputs)[0]

  cutout_center_time = tf.random.uniform(
      shape=[batch_size], minval=0, maxval=time_size, dtype=tf.int32, seed=seed
  )
  cutout_center_feature = tf.random.uniform(
      shape=[batch_size],
      minval=0,
      maxval=feature_size,
      dtype=tf.int32,
      seed=seed)
  offset = tf.transpose([cutout_center_time, cutout_center_feature], [1, 0])
  origin_shape = inputs.shape
  offset = tf.convert_to_tensor(offset)
  mask_size = mask_size // 2
  cutout_center_time = offset[:, 0]
  cutout_center_feature = offset[:, 1]

  lower_pads = tf.maximum(0, cutout_center_time - mask_size[0])
  upper_pads = tf.maximum(0, time_size - cutout_center_time - mask_size[0])
  left_pads = tf.maximum(0, cutout_center_feature - mask_size[1])
  right_pads = tf.maximum(0,
                          feature_size - cutout_center_feature - mask_size[1])

  cutout_shape = tf.transpose(
      [
          time_size - (lower_pads + upper_pads),
          feature_size - (left_pads + right_pads),
      ],
      [1, 0],
  )
  masks = tf.TensorArray(inputs.dtype, 0, dynamic_size=True)
  for i in tf.range(tf.shape(cutout_shape)[0]):
    padding_dims = [
        [lower_pads[i], upper_pads[i]],
        [left_pads[i], right_pads[i]],
    ]
    mask = tf.pad(
        tf.zeros(cutout_shape[i], dtype=inputs.dtype),
        padding_dims,
        constant_values=1,
    )
    masks = masks.write(i, mask)

  if data_format == 'channels_last':
    mask = tf.expand_dims(masks.stack(), -1)
    mask = tf.tile(mask, [1, 1, 1, tf.shape(inputs)[-1]])
  else:
    mask = tf.expand_dims(masks.stack(), 1)
    mask = tf.tile(mask, [1, tf.shape(inputs)[1], 1, 1])

  inputs = tf.where(
      tf.equal(mask, 0),
      tf.ones_like(inputs, dtype=inputs.dtype) * mask_value,
      inputs,
  )
  inputs.set_shape(origin_shape)
  return inputs