def standardize_image( image_bytes: tf.Tensor, stddev: Tuple[float, ...], num_channels: int = 3, dtype: tf.dtypes.DType = tf.float32, ) -> tf.Tensor: """Divides the given stddev from each image channel. For example: stddev = [123.68, 116.779, 103.939] image_bytes = standardize_image(image_bytes, stddev) Note that the rank of `image` must be known. Args: image_bytes: a tensor of size [height, width, C]. stddev: a C-vector of values to divide from each channel. num_channels: number of color channels in the image that will be distorted. dtype: the dtype to convert the images to. Set to `None` to skip conversion. Returns: the centered image. Raises: ValueError: If the rank of `image` is unknown, if `image` has a rank other than three or if the number of channels in `image` doesn't match the number of values in `stddev`. """ if image_bytes.get_shape().ndims != 3: raise ValueError('Input must be of size [height, width, C>0]') if len(stddev) != num_channels: raise ValueError('len(stddev) must match the number of channels') # We have a 1-D tensor of stddev; convert to 3-D. # Note(b/130245863): we explicitly call `broadcast` instead of simply # expanding dimensions for better performance. stddev = tf.broadcast_to(stddev, tf.shape(image_bytes)) if dtype is not None: stddev = tf.cast(stddev, dtype=dtype) return image_bytes / stddev
def call(self, inputs: tf.Tensor) -> Tuple[tf.Tensor, tf.Tensor]: """Forward pass of the model. Takes as input a sound processed by CARFAC, FFT, and a power calculation resulting in a tensor of shape [bsz, CARFAC channels, frequency bins, 2] where 2 is signal power and noise power in dB. Args: inputs: a batch of inputs [bsz, CARFAC channels, frequency bins, 2] Returns: A tuple of tensors containing the loundess predictions in phons per frequency bin and a soft mask over these bins. """ (_, channels, bins, feature_dim) = inputs.get_shape().as_list() if not self.use_channels: # [bsz, 1, bins, dim] -> [bsz, 1, bins * dim] # [bsz, 1, bins, 1] -> [bsz, bins, 1] inputs = tf.squeeze(inputs, axis=1) # [bsz, bins, 1] -> [bsz, hidden_dim] hidden = self.bilstm(inputs) hidden = self.dropout(hidden) # [bsz, hidden_dim] -> [bsz, 1] output = self.relu(self.hidden_to_logits(hidden)) # [bsz, 1] -> [bsz] output = tf.squeeze(output, axis=1) else: # [bsz, channels, bins, dim] -> [bsz, bins, channels, dim] inputs = tf.transpose(inputs, perm=[0, 2, 1, 3]) # [bsz, bins, 1, dim] conditioning = inputs[:, :, -1, :] # [bsz, bins, 1, dim] coefs = inputs[:, :, -2, :] # [bsz, bins + 1] coefs = tf.concat( [coefs, tf.expand_dims(conditioning[:, 0], axis=2)], axis=1) # [bsz, bins + 1, 1] -> [bsz, hidden_dim] hidden_two = self.bilstm_two(coefs) # [bsz, bins, carfac_channels, dim] inputs = inputs[:, :, :-2, :] inputs = self.dropout(inputs) # [bsz, bins, carfac_shannels, dim] -> [bsz, bins, carfac_channels * dim] inputs = tf.reshape(inputs, [-1, bins, (channels - 2) * feature_dim]) # [bsz, bins, carfac_channels * dim] -> [bsz, bins, hidden_dim] hidden = self.features_to_hidden(inputs) # [bsz, bins, hidden_dim] -> [bsz, hidden_dim] hidden_one = self.bilstm_one(hidden) hidden = self.dropout(hidden) # (_, bins, feature_dim) = hidden.get_shape().as_list() # # [bsz, bins, hidden_dim] -> [bsz, bins * hidden_dim] # hidden_flat = tf.reshape(hidden, [-1, bins * feature_dim]) # [bsz, hidden_dim] -> [bsz, hidden_dim + hidden_dim] hidden = tf.concat([hidden_one, hidden_two], axis=1) # [bsz, hidden_dim + hidden_dim] -> [bsz, 1] output = self.relu(self.hidden_to_logits(hidden)) if len(output.get_shape().as_list()) == 1: output = tf.expand_dims(output, axis=0) return output
def _check_value(self, tensor: tf.Tensor, tensorspec: tf.TensorSpec): if not tf.TensorShape(tf.squeeze( tensor.get_shape())).is_compatible_with(tensorspec.shape): raise ValueError( 'Tensor {} is not compatible with specification {}.'.format( tensor, tensorspec))
def stateless_dropout(x: tf.Tensor, rate: float, seed: tf.Tensor, noise_shape: Optional[Union[Sequence[int], tf.TensorShape]] = None, name: Optional[Text] = None) -> tf.Tensor: """Computes dropout: randomly sets elements to zero to prevent overfitting. See https://www.tensorflow.org/api_docs/python/tf/nn/dropout. This version differs in that the seed is required if the rate is nonzero. Args: x: A floating point tensor. rate: A scalar `Tensor` with the same type as x. The probability that each element is dropped. For example, setting rate=0.1 would drop 10% of input elements. seed: A shape [2] integer Tensor of seeds to the random number generator. Must have dtype `tf.int32` when compiling to XLA. noise_shape: A 1-D `Tensor` of type `int32`, representing the shape for randomly generated keep/drop flags. name: A name for this operation (optional). Returns: A `Tensor` of the same shape of `x`. Raises: ValueError: If `rate` is not in `[0, 1)` or if `x` is not a floating point tensor. `rate=1` is disallowed, because the output would be all zeros, which is likely not what was intended. """ with tf.name_scope(name or 'stateless_dropout') as name: x = tf.convert_to_tensor(x, name='x') if not x.dtype.is_floating: raise ValueError( 'x has to be a floating point tensor since it\'s going ' ' to be scaled. Got a %s tensor instead.' % x.dtype) if isinstance(rate, numbers.Real): if not (rate >= 0 and rate < 1): raise ValueError( 'rate must be a scalar tensor or a float in the ' 'range [0, 1), got %g' % rate) if rate > 0.5: logging.log_first_n( logging.WARN, 'Large dropout rate: %g (>0.5). In TensorFlow ' '.x, dropout() uses dropout rate instead of keep_prob. ' 'Please ensure that this is intended.', 5, rate) # Early return if nothing needs to be dropped. if tf.get_static_value(rate) == 0: return x rate = tf.convert_to_tensor(rate, dtype=x.dtype, name='rate') rate.shape.assert_has_rank(0) noise_shape = _get_noise_shape(x, noise_shape) # Sample a uniform distribution on [0.0, 1.0) and select values larger than # rate. # # NOTE: Random uniform actually can only generate 2^23 floats on [1.0, 2.0) # and subtract 1.0. random_tensor = tf.random.stateless_uniform(noise_shape, seed=seed, dtype=x.dtype) keep_prob = 1 - rate scale = 1 / keep_prob # NOTE: if (1.0 + rate) - 1 is equal to rate, then we want to consider that # float to be selected, hence we use a >= comparison. keep_mask = random_tensor >= rate ret = x * scale * tf.cast(keep_mask, x.dtype) if not tf.executing_eagerly(): ret.set_shape(x.get_shape()) return ret