Ejemplo n.º 1
0
def cutout(image, pad_size, replace=0):
    """Apply cutout (https://arxiv.org/abs/1708.04552) to image.
    This operation applies a (2*pad_size x 2*pad_size) mask of zeros to
    a random location within `img`. The pixel values filled in will be of the
    value `replace`. The located where the mask will be applied is randomly
    chosen uniformly over the whole image.
    Args:
      image: An image Tensor of type uint8.
      pad_size: Specifies how big the zero mask that will be generated is that
        is applied to the image. The mask will be of size
        (2*pad_size x 2*pad_size).
      replace: What pixel value to fill in the image in the area that has
        the cutout mask applied to it.
    Returns:
      An image Tensor that is of type uint8.
    """
    image_height = tf.shape(image)[0]
    image_width = tf.shape(image)[1]

    # Sample the center location in the image where the zero mask will be applied.
    cutout_center_height = tf.random_uniform(shape=[],
                                             minval=0,
                                             maxval=image_height,
                                             dtype=tf.int32)

    cutout_center_width = tf.random_uniform(shape=[],
                                            minval=0,
                                            maxval=image_width,
                                            dtype=tf.int32)

    lower_pad = tf.maximum(0, cutout_center_height - pad_size)
    upper_pad = tf.maximum(0, image_height - cutout_center_height - pad_size)
    left_pad = tf.maximum(0, cutout_center_width - pad_size)
    right_pad = tf.maximum(0, image_width - cutout_center_width - pad_size)

    cutout_shape = [
        image_height - (lower_pad + upper_pad),
        image_width - (left_pad + right_pad)
    ]
    padding_dims = [[lower_pad, upper_pad], [left_pad, right_pad]]
    mask = tf.pad(tf.zeros(cutout_shape, dtype=image.dtype),
                  padding_dims,
                  constant_values=1)
    mask = tf.expand_dims(mask, -1)
    mask = tf.tile(mask, [1, 1, 3])
    image = tf.where(tf.equal(mask, 0),
                     tf.ones_like(image, dtype=image.dtype) * replace, image)
    return image
Ejemplo n.º 2
0
def pad_batch(features, batch_multiple):
    """Pad batch dim of features to nearest multiple of batch_multiple."""
    feature = list(features.items())[0][1]
    batch_size = tf.shape(feature)[0]
    mod = batch_size % batch_multiple
    has_mod = tf.cast(tf.cast(mod, tf.bool), tf.int32)
    batch_padding = batch_multiple * has_mod - mod

    padded_features = {}
    for k, feature in features.items():
        rank = len(feature.shape)
        paddings = [[0, 0] for _ in range(rank)]
        paddings[0][1] = batch_padding
        padded_feature = tf.pad(feature, paddings)
        padded_features[k] = padded_feature
    return padded_features
Ejemplo n.º 3
0
    def output_layer(self, bottom, in_channels, out_channels, name, var_list):
        with tf.variable_scope(name):
            filt_size = 9
            filt, conv_biases = self.get_conv_var(filt_size, in_channels,
                                                  out_channels, name)
            bottom = tf.pad(
                bottom, [[0, 0], [
                    int(filt_size / 2), int(filt_size / 2)
                ], [int(filt_size / 2), int(filt_size / 2)], [0, 0]],
                mode='REFLECT')
            conv = tf.nn.conv2d(bottom, filt, [1, 1, 1, 1], padding='VALID')
            bias = tf.nn.bias_add(conv, conv_biases)

            var_list.append(filt)
            var_list.append(conv_biases)
            return bias, var_list
Ejemplo n.º 4
0
	def conv2d(x, filters, kernel_size, mask_format, use_bias=False, scope='conv2d'):
		with tf.variable_scope(scope):
			# size 
			left, top, right, bottom = kernel_size			
			# weight
			weight = Ops.make_weight2d(x.get_shape()[-1].value, filters, kernel_size, mask_format)			
			# pad => pad 0 because we do not have info about anything around
			paddings = [[0,0],[top, bottom],[left, right],[0,0]]
			x = tf.pad(x, paddings, mode='CONSTANT', constant_values=0)
			x = tf.nn.conv2d(x, filters=weight, strides=(1,1), padding='VALID')
			
			if use_bias:
				# bias
				bias = tf.get_variable('bias', shape=[filters], dtype=tf.float32, initializer=Ops.bias_initializer)
				x = x + bias
			return x
Ejemplo n.º 5
0
def _call_sampler(sample_n_fn, sample_shape, name=None):
  """Reshapes vector of samples."""
  with tf.name_scope(name, "call_sampler", values=[sample_shape]):
    sample_shape = tf.convert_to_tensor(
        sample_shape, dtype=tf.int32, name="sample_shape")
    # Ensure sample_shape is a vector (vs just a scalar).
    pad = tf.cast(tf.equal(tf.rank(sample_shape), 0), tf.int32)
    sample_shape = tf.reshape(
        sample_shape,
        tf.pad(tf.shape(sample_shape),
               paddings=[[pad, 0]],
               constant_values=1))
    samples = sample_n_fn(tf.reduce_prod(sample_shape))
    batch_event_shape = tf.shape(samples)[1:]
    final_shape = tf.concat([sample_shape, batch_event_shape], 0)
    return tf.reshape(samples, final_shape)
Ejemplo n.º 6
0
def mask_joint_logits(input_mask, start_end_logits):
    """Masks logits based on input mask and valid start/end combinations."""
    _, _, length = modeling.get_shape_list(start_end_logits, expected_rank=3)

    mask = tf.TensorArray(input_mask.dtype, size=length, dynamic_size=False)
    for i in range(length):
        mask = mask.write(i, input_mask)
        # The permitted span length is determined by the existing mask combined
        # with its being shifted up by one.
        input_mask = input_mask * tf.pad(input_mask[:, 1:], [[0, 0], [0, 1]])
    mask = mask.stack()
    mask = tf.transpose(mask, [1, 2, 0])
    mask.shape.assert_is_compatible_with(start_end_logits.shape)

    start_end_logits -= 1e6 * tf.cast(1 - mask, tf.float32)
    return start_end_logits
def upscale2d_conv2d(x, fmaps, kernel, gain=np.sqrt(2), use_wscale=False):
    assert kernel >= 1 and kernel % 2 == 1
    w = get_weight([kernel, kernel, fmaps, x.shape[1].value],
                   gain=gain,
                   use_wscale=use_wscale,
                   fan_in=(kernel**2) * x.shape[1].value)
    w = tf.pad(w, [[1, 1], [1, 1], [0, 0], [0, 0]], mode='CONSTANT')
    w = tf.add_n([w[1:, 1:], w[:-1, 1:], w[1:, :-1], w[:-1, :-1]])
    w = tf.cast(w, x.dtype)
    os = [tf.shape(x)[0], fmaps, x.shape[2] * 2, x.shape[3] * 2]
    return tf.nn.conv2d_transpose(x,
                                  w,
                                  os,
                                  strides=[1, 1, 2, 2],
                                  padding='SAME',
                                  data_format='NCHW')
Ejemplo n.º 8
0
 def residual(self, x, shortcut, out_filters, stride, type='B'):
     in_shape = shortcut.get_shape()
     pad = int(x.get_shape()[3] - in_shape[3])
     if pad != 0 or type == 'C':
         if type == 'A':
             shortcut = tf.strided_slice(shortcut, [0, 0, 0, 0],
                                         in_shape,
                                         strides=[1, stride, stride, 1])
             shortcut = tf.pad(shortcut,
                               paddings=[[0, 0], [0, 0], [0, 0], [0, pad]])
         else:
             shortcut = self.conv(shortcut, 1, stride, out_filters)
             shortcut = self.norm(shortcut)
     x = shortcut + x
     x = self.relu(x)
     return x
Ejemplo n.º 9
0
    def symbols_to_logits_fn(ids):
        """Go from ids to logits."""
        ids = tf.expand_dims(ids, axis=2)  # Ids start with added all-zeros.
        latents_discrete = tf.pad(ids[:, 1:], [[0, 0], [0, 1], [0, 0]])

        with tf.variable_scope(tf.get_variable_scope(), reuse=False):
            latents_dense = embed(
                tf.one_hot(latents_discrete, depth=2**hparams.bottleneck_bits))
            latents_pred = decode_transformer(inputs, ed, latents_dense,
                                              hparams, "extra")
            logits = tf.layers.dense(latents_pred,
                                     2**hparams.bottleneck_bits,
                                     name="extra_logits")
            current_output_position = common_layers.shape_list(ids)[1] - 1
            logits = logits[:, current_output_position, :, :]
        return tf.squeeze(logits, axis=[1])
Ejemplo n.º 10
0
def conv2d_downscale2d(x, fmaps, kernel, fused_scale='auto', **kwargs):
    assert kernel >= 1 and kernel % 2 == 1
    assert fused_scale in [True, False, 'auto']
    if fused_scale == 'auto':
        fused_scale = min(x.shape[2:]) >= 128

    # Not fused => call the individual ops directly.
    if not fused_scale:
        return downscale2d(conv2d(x, fmaps, kernel, **kwargs))

    # Fused => perform both ops simultaneously using tf.nn.conv2d().
    w = get_weight([kernel, kernel, x.shape[1], fmaps], **kwargs)
    w = tf.pad(w, [[1,1], [1,1], [0,0], [0,0]], mode='CONSTANT')
    w = tf.add_n([w[1:, 1:], w[:-1, 1:], w[1:, :-1], w[:-1, :-1]]) * 0.25
    w = tf.cast(w, x.dtype)
    return tf.nn.conv2d(x, w, strides=[1,1,2,2], padding='SAME', data_format='NCHW')
Ejemplo n.º 11
0
    def test_readme_example(self):
        data = tf.random.uniform((128, 128), 0, 10, dtype=tf.int32)
        histogram = tf.bincount(data, minlength=10, maxlength=10)
        cdf = tf.cumsum(histogram, exclusive=False)
        cdf = tf.pad(cdf, [[1, 0]])
        cdf = tf.reshape(cdf, [1, 1, -1])

        data = tf.cast(data, tf.int16)
        encoded = range_coding_ops.range_encode(data, cdf, precision=14)
        decoded = range_coding_ops.range_decode(encoded,
                                                tf.shape(data),
                                                cdf,
                                                precision=14)

        with self.cached_session() as sess:
            self.assertAllEqual(*sess.run((data, decoded)))
Ejemplo n.º 12
0
def add_timing_signal_1d_given_position(x,
                                        position,
                                        min_timescale=1.0,
                                        max_timescale=1.0e4):
    channels = tf.shape(x)[2]
    num_timescales = channels // 2
    log_timescale_increment = (
        math.log(float(max_timescale) / float(min_timescale)) /
        (tf.to_float(num_timescales) - 1))
    inv_timescales = min_timescale * tf.exp(
        tf.to_float(tf.range(num_timescales)) * -log_timescale_increment)
    scaled_time = (tf.expand_dims(tf.to_float(position), 2) *
                   tf.expand_dims(tf.expand_dims(inv_timescales, 0), 0))
    signal = tf.concat([tf.sin(scaled_time), tf.cos(scaled_time)], axis=2)
    signal = tf.pad(signal, [[0, 0], [0, 0], [0, tf.mod(channels, 2)]])
    return x + signal
Ejemplo n.º 13
0
 def _random_segmentation(num_items, num_segments):
     """Partition a sequence of items randomly into non-empty segments.
     Args:
     num_items: an integer scalar > 0
     num_segments: an integer scalar in [1, num_items]
     Returns:
     a Tensor with shape [num_segments] containing positive integers that add
     up to num_items
     """
     first_in_segment = tf.pad(
         tf.random.shuffle(
             to_int(tf.range(num_items - 1) < num_segments - 1), seed=123),
         [[1, 0]])
     segment_id = tf.cumsum(first_in_segment)
     segment_length = tf.segment_sum(tf.ones_like(segment_id), segment_id)
     return segment_length
def categorical_case(pmf, fns, rand=None):
  """Returns the outputs of fns[i] with probability pmf[i].

  Args:
    pmf: A 1-D tensor of probabilities, the probability mass function.
    fns: A list of callables that return tensors, same length as pmf.
    rand: An optional scalar between 0.0 and 1.0, the output of an RNG.

  Returns:
    A tensor, the output of fns[i] with probability pmf[i].
  """
  rand = tf.random_uniform([]) if rand is None else rand
  cmf = tf.pad(tf.cumsum(pmf), [(1, 0)])
  cmf = [cmf[i] for i in range(len(fns) + 1)]
  preds = [(rand >= a) & (rand < b) for a, b in zip(cmf[:-1], cmf[1:])]
  return tf.case(list(zip(preds, fns)), exclusive=True)
Ejemplo n.º 15
0
def conv2d_same(inputs, num_outputs, kernel_size, stride, rate=1, scope=None):
  """Strided 2-D convolution with 'SAME' padding.

  When stride > 1, then we do explicit zero-padding, followed by conv2d with
  'VALID' padding.

  Note that

     net = conv2d_same(inputs, num_outputs, 3, stride=stride)

  is equivalent to

     net = slim.conv2d(inputs, num_outputs, 3, stride=1, padding='SAME')
     net = subsample(net, factor=stride)

  whereas

     net = slim.conv2d(inputs, num_outputs, 3, stride=stride, padding='SAME')

  is different when the input's height or width is even, which is why we add the
  current function. For more details, see ResnetUtilsTest.testConv2DSameEven().

  Args:
    inputs: A 4-D tensor of size [batch, height_in, width_in, channels].
    num_outputs: An integer, the number of output filters.
    kernel_size: An int with the kernel_size of the filters.
    stride: An integer, the output stride.
    rate: An integer, rate for atrous convolution.
    scope: Scope.

  Returns:
    output: A 4-D tensor of size [batch, height_out, width_out, channels] with
      the convolution output.
  """
  if stride == 1:
    return slim.conv2d(inputs, num_outputs, kernel_size, stride=1, rate=rate,
                       padding='SAME', scope=scope)
  else:
    kernel_size_effective = kernel_size + (kernel_size - 1) * (rate - 1)
    pad_total = kernel_size_effective - 1
    pad_beg = pad_total // 2
    pad_end = pad_total - pad_beg
    inputs = tf.pad(
        tensor=inputs,
        paddings=[[0, 0], [pad_beg, pad_end], [pad_beg, pad_end], [0, 0]])
    return slim.conv2d(inputs, num_outputs, kernel_size, stride=stride,
                       rate=rate, padding='VALID', scope=scope)
    def body(activations_ta, current_activations, current_states, diagonal):
        """
            process diagonal 0, 1, 2, ...
        """

        # Get the diagonal values of the input
        # b x d x inp_size x direction
        input_diagonal = get_diagonal_values(diagonal, input_data_transposed)

        # need to pad aci/cell except in first iteration
        not_first_acti = tf.cond(
            diagonal < height, lambda: tf.pad(
                current_activations, [[0, 0], [1, 1], [0, 0], [0, 0]]),
            lambda: current_activations)

        current_activations = tf.cond(tf.equal(diagonal,
                                               0), lambda: current_activations,
                                      lambda: not_first_acti)

        not_first_cell = tf.cond(diagonal < height,
                                 lambda: pad_with_initial(current_states),
                                 lambda: current_states)

        current_states = tf.cond(tf.equal(diagonal, 0), lambda: current_states,
                                 lambda: not_first_cell)

        # work out new activations
        current_activations, current_states = cell(input_diagonal,
                                                   current_activations,
                                                   current_states)

        # batch x diagonal x unit x direction
        current_states.set_shape([None, None, units, directions])
        current_activations.set_shape([None, None, units, directions])

        # get indices to place into activations
        indices = get_single_diagonal_indices(height, width, diagonal)

        # we transpose so that correct values from current activations go in the correct place
        # scatter works by using the first index
        # thus activations contains
        # batch x units x direction
        activations_ta = activations_ta.scatter(
            indices, tf.transpose(current_activations, (1, 0, 2, 3)))

        diagonal += 1
        return activations_ta, current_activations, current_states, diagonal
Ejemplo n.º 17
0
def bicubic_downsample(x, factor, B=1/3., C=1/3.):
    """Downsample x by a factor of factor, using the filter built by build_filter()
    x: a rank 4 tensor with format NHWC
    factor: downsampling factor (ex: factor=2 means the output size is (h/2, w/2))
    """
    # using padding calculations from https://www.tensorflow.org/api_guides/python/nn#Convolution
    kernel_size = factor * 4
    padding = kernel_size - factor
    pad_top = padding // 2
    pad_bottom = padding - pad_top
    pad_left = padding // 2
    pad_right = padding - pad_left
    # apply mirror padding
    x = tf.pad(x, [[0,0], [pad_top,pad_bottom], [pad_left,pad_right], [0,0]], mode='REFLECT')
    # downsampling performed by strided conv
    x = tf.nn.conv2d(x, build_filter(factor, B, C), [1,factor,factor,1], 'VALID', data_format='NHWC')
    return x
Ejemplo n.º 18
0
def pad_to_multiple(tensor: tf.Tensor,
                    factor: Union[int, tf.Tensor],
                    axis: int,
                    mode: Optional[Text] = 'CONSTANT',
                    constant_values=0,
                    name: Optional[Text] = None) -> tf.Tensor:
  """Pads `tensor` on a given `axis` to be a multiple of `factor`.

  Padding will be concatenated to the end of the axis only, not the beginning.
  If the length along `axis` is already a multiple of `factor`, this is
  effectively a no-op.

  Args:
    tensor: A Tensor with rank >= 1 to pad.
    factor: Positive integer factor to pad for. If a Tensor, must be a scalar
      int.
    axis: A valid axis in `tensor` to pad.
    mode: The padding mode to use according to `tf.pad`. Defaults to 'CONSTANT'.
    constant_values: For 'CONSTANT' mode, the scalar pad value to use within
      `tf.pad`. Defaults to 0. Must be same type as `tensor`.
    name: A name for the operation (optional).

  Returns:
    The padded Tensor result.
  """
  with tf.name_scope(name or 'pad_to_multiple'):
    tensor = tf.convert_to_tensor(tensor)

    if isinstance(factor, int) and factor < 1:
      raise ValueError('`factor` must be positive.')
    rank = tensor.shape.rank
    if rank is None:
      raise ValueError('Static rank of `tensor` must be known.')
    if axis < 0:
      axis += rank
    if axis < 0 or axis >= rank:
      raise ValueError('`axis` out of bounds for `tensor` rank.')

    axis_len = get_shape_list(tensor)[axis]
    pad_len = -axis_len % factor
    paddings = pad_len * tf.one_hot([-1, axis], rank, axis=0, dtype=tf.int32)
    return tf.pad(
        tensor=tensor,
        paddings=paddings,
        mode=mode,
        constant_values=constant_values)
Ejemplo n.º 19
0
def get_timing_signal_1d(length,
                         channels,
                         min_timescale=1.0,
                         max_timescale=1.0e4):
    position = tf.to_float(tf.range(length))
    num_timescales = channels // 2
    log_timescale_increment = (
        math.log(float(max_timescale) / float(min_timescale)) /
        (tf.to_float(num_timescales) - 1))
    inv_timescales = min_timescale * tf.exp(
        tf.to_float(tf.range(num_timescales)) * -log_timescale_increment)
    scaled_time = tf.expand_dims(position, 1) * tf.expand_dims(
        inv_timescales, 0)
    signal = tf.concat([tf.sin(scaled_time), tf.cos(scaled_time)], axis=1)
    signal = tf.pad(signal, [[0, 0], [0, tf.mod(channels, 2)]])
    signal = tf.reshape(signal, [1, length, channels])
    return signal
Ejemplo n.º 20
0
    def bucket_fn(x):
        """Compute the element bucket and update the histogram."""
        ix = len_fn(x)
        if ix.dtype == tf.int32:
            ix = tf.to_int64(ix)
        elif ix.dtype != tf.int64:
            raise ValueError("Len function returned a non-int")

        adds_to_bins = tf.to_int64(tf.greater(hist_bounds, ix))
        # pad with a 1 for the "larger than all" bin
        adds_to_bins = tf.pad(adds_to_bins, [[0, 1]], constant_values=1)
        new_counts = tf.assign_add(hist_counts, adds_to_bins)
        bin_ix = n_hist_binds - tf.reduce_sum(adds_to_bins)
        # Computes the quantile based on the counts of the exammple's bucket
        bucket_ix = tf.floordiv(((n_buckets - 1) * new_counts[bin_ix]),
                                new_counts[-1])
        return bucket_ix
Ejemplo n.º 21
0
def _batch_stitch(features, mean_length=4.0, stddev=2.0):
  """Stitches a batch of single-step data to a batch of multi-step data."""
  batch_size = common_layers.shape_list(features['task'])[0]
  num_sequences = tf.maximum(
      tf.to_int32(tf.to_float(batch_size) / mean_length), 1)
  lengths = tf.random.truncated_normal(shape=[num_sequences],
                                       mean=mean_length, stddev=stddev)
  max_length = tf.reduce_max(lengths) * (
      tf.to_float(batch_size) / tf.reduce_sum(lengths))
  max_length = tf.to_int32(tf.ceil(max_length))
  total_items = max_length * num_sequences
  num_paddings = total_items - batch_size
  indices = tf.random.shuffle(tf.range(total_items))
  for key in features:
    shape_list = common_layers.shape_list(features[key])
    assert len(shape_list) >= 1
    with tf.control_dependencies([
        tf.assert_greater_equal(num_paddings, 0,
                                name='num_paddings_positive')]):
      paddings = [[0, num_paddings]] + [[0, 0]] * (len(shape_list) - 1)
    features[key] = tf.pad(features[key], paddings,
                           constant_values=-1 if key == 'obj_type' else 0)
    features[key] = tf.gather(features[key], indices)
    shape = [num_sequences, max_length]
    if len(shape_list) >= 2:
      shape += shape_list[1:]
    features[key] = tf.reshape(features[key], shape)
  # Remove all-padding seqs
  step_mask = tf.reduce_any(tf.greater(features['task'], 1), axis=-1)
  mask = tf.reduce_any(step_mask, axis=-1)
  step_mask = tf.boolean_mask(step_mask, mask)
  for key in features:
    features[key] = tf.boolean_mask(features[key], mask=mask)
  num_sequences = tf.shape(features['task'])[0]
  # Sort steps within each seq
  _, step_indices = tf.math.top_k(tf.to_int32(step_mask), k=max_length)
  step_indices = step_indices + tf.expand_dims(
      tf.range(num_sequences) * max_length, 1)
  step_indices = tf.reshape(step_indices, [-1])
  for key in features:
    shape_list = common_layers.shape_list(features[key])
    features[key] = tf.gather(tf.reshape(features[key], [-1] + shape_list[2:]),
                              step_indices)
    features[key] = tf.reshape(features[key], shape_list)
  features = _stitch(features)
  return features
Ejemplo n.º 22
0
  def pad(tensor, pad_len):
    """Pad tensor on first dimension to pad_len.

    Args:
      tensor: input tensor of shape length >= 2
      pad_len: pad length

    Returns:
      tf.Tensor: Padded input tensor.
    """

    assert len(tensor.shape) >= 2  # tensor of shape [batch, length, ...]
    length = tf.shape(tensor)[1]

    padding = [[0, 0], [0, pad_len - length]]
    padding += [[0, 0]] * (len(tensor.shape) - 2)
    return tf.pad(tensor, padding)
Ejemplo n.º 23
0
 def waves_to_stfts(self, waves):
     """
         Convert from waves to complex stfts.
     Inputs:
         - Tensor waves: Tensor of the waveform, shape [batch, time, 1].
     Outputs:
         - Tensor stfts: Complex64 tensor of stft, shape [batch, time, freq, 1]."""
     waves_padded = tf.pad(waves,
                           [[0, 0], [self._pad_l, self._pad_r], [0, 0]])
     stfts = tf.signal.stft(waves_padded[:, :, 0],
                            frame_length=self._nfft,
                            frame_step=self._nhop,
                            fft_length=self._nfft,
                            window_fn=tf.signal.hann_window,
                            pad_end=False)[:, :, :, tf.newaxis]
     stfts = stfts[:, :, 1:] if self._discard_dc else stfts[:, :, :-1]
     return stfts
Ejemplo n.º 24
0
def _interactive_input_tensor_to_features_dict(feature_map, hparams):
    """Convert the interactive input format (see above) to a dictionary.

  Args:
    feature_map: dict with inputs.
    hparams: model hyperparameters

  Returns:
    a features dictionary, as expected by the decoder.
  """
    inputs = tf.convert_to_tensor(feature_map["inputs"])
    input_is_image = False if len(inputs.get_shape()) < 3 else True

    x = inputs
    if input_is_image:
        x = tf.image.resize_images(x, [299, 299])
        x = tf.reshape(x, [1, 299, 299, -1])
        x = tf.to_int32(x)
    else:
        # Remove the batch dimension.
        num_samples = x[0]
        length = x[2]
        x = tf.slice(x, [3], tf.to_int32([length]))
        x = tf.reshape(x, [1, -1, 1, 1])
        # Transform into a batch of size num_samples to get that many random
        # decodes.
        x = tf.tile(x, tf.to_int32([num_samples, 1, 1, 1]))

    p_hparams = hparams.problem_hparams
    input_space_id = tf.constant(p_hparams.input_space_id)
    target_space_id = tf.constant(p_hparams.target_space_id)

    features = {}
    features["input_space_id"] = input_space_id
    features["target_space_id"] = target_space_id
    features["decode_length"] = (IMAGE_DECODE_LENGTH
                                 if input_is_image else inputs[1])
    features["inputs"] = x
    # Save inputs to "partial_targets" when prepending inputs to targets. Also
    # keep "inputs" as some models crash if they don't exist.
    if getattr(hparams, "prepend_mode", "none") != "none":
        shape = tf.shape(x)
        partial_targets = tf.reshape(x, [shape[0], shape[1]])
        partial_targets = tf.pad(partial_targets, [[0, 0], [0, 1]])
        features["partial_targets"] = partial_targets
    return features
def create_test_network(image_resolution, convert_variables_to_constants):
    """Convolutional neural network for test.

  Args:
    image_resolution: Resolution to use for input placeholder. Used for height
      and width dimensions.
    convert_variables_to_constants: Whether to convert variables to constants.

  Returns:
    graph_def: GraphDef proto of the model.
  """
    g = tf.Graph()
    sess = tf.Session(graph=g)
    with g.as_default():
        # An input test image with unknown spatial resolution.
        x = tf.placeholder(tf.float32,
                           (1, image_resolution, image_resolution, 1),
                           name='input_image')
        # Left branch before first addition.
        l1 = slim.conv2d(x, 1, [1, 1], stride=4, scope='L1', padding='VALID')
        # Right branch before first addition.
        l2_pad = tf.pad(x, [[0, 0], [1, 0], [1, 0], [0, 0]], name='L2_pad')
        l2 = slim.conv2d(l2_pad,
                         1, [3, 3],
                         stride=2,
                         scope='L2',
                         padding='VALID')
        l3 = slim.max_pool2d(l2, [3, 3], stride=2, scope='L3', padding='SAME')
        # First addition.
        l4 = tf.nn.relu(l1 + l3, name='L4_relu')
        # Left branch after first addition.
        l5 = slim.conv2d(l4, 1, [1, 1], stride=2, scope='L5', padding='SAME')
        # Right branch after first addition.
        l6 = slim.conv2d(l4, 1, [3, 3], stride=2, scope='L6', padding='SAME')
        # Final addition.
        tf.add(l5, l6, name='L7_add')

        if convert_variables_to_constants:
            sess.run(tf.global_variables_initializer())
            graph_def = tf.graph_util.convert_variables_to_constants(
                sess, g.as_graph_def(), ['L7_add'])
        else:
            graph_def = g.as_graph_def()

    return graph_def
Ejemplo n.º 26
0
def transform_space_to_depth_kernel(kernel, dtype, block_size=2):
    """Transforms the convolution kernel for space-to-depth computation.

  This function transforms the kernel for space-to-depth convolution. For
  example, the kernel size is [7, 7, 3, 64] (conv0 in ResNet), and the
  block_size is 2. First the kernel is padded with (top and left) zeros to
  [8, 8, 3, 64]. Then, it is transformed to [4, 4, 12, 64] and casted to the
  `dtype`.

  Args:
    kernel: A tensor with a shape of [height, width, in_depth, out_depth].
    dtype: The type of the input of the convoluation kernel. The kernel will be
      casted to this type.
    block_size: An `int` to indicate the block size in space-to-depth transform.

  Returns:
    A transformed kernel that has the same type as `dtype`. The shape is
    [height // block_size, width // block_size, in_depth * (block_size ** 2),
     out_depth].
  """
    def _round_up(num, multiple):
        remainder = num % multiple
        if remainder == 0:
            return num
        else:
            return num + multiple - remainder

    h, w, in_d, out_d = kernel.get_shape().as_list()
    pad_h = _round_up(h, block_size) - h
    pad_w = _round_up(w, block_size) - w
    kernel = tf.pad(kernel,
                    paddings=tf.constant([[pad_h, 0], [pad_w, 0], [0, 0],
                                          [0, 0]]),
                    mode='CONSTANT',
                    constant_values=0.)
    kernel = tf.reshape(kernel,
                        [(h + pad_h) // block_size, block_size,
                         (w + pad_w) // block_size, block_size, in_d, out_d])
    kernel = tf.transpose(kernel, [0, 2, 1, 3, 4, 5])
    kernel = tf.reshape(
        kernel, [(h + pad_h) // block_size,
                 (w + pad_w) // block_size, in_d * (block_size**2), out_d])
    kernel = tf.cast(kernel, dtype)

    return kernel
Ejemplo n.º 27
0
    def _build(self, vector):
        vector.get_shape().assert_is_compatible_with((None, self.input_size))
        n = tf.shape(vector)[0]  # Get batch size.

        rows = []
        start_index = 0
        block_height, block_width = self._block_shape

        # Construct the individual block rows.
        for r in xrange(self._block_rows):
            # Construct an individual block row as a concatenation of a block of
            # zeros (left zeros), the actual content (coming from the input), and
            # another block of zeros (right zeros). Each of these blocks can be empty.
            left_zero_blocks = self._left_zero_blocks(r)
            right_zero_blocks = self._right_zero_blocks(r)
            content_blocks = self._content_blocks(r)

            assert (left_zero_blocks + content_blocks +
                    right_zero_blocks == self._block_rows)

            assert left_zero_blocks >= 0
            assert right_zero_blocks >= 0
            assert content_blocks >= 0

            # Take the next chunk of entries from the input vector
            # and increase the starting index into the input vector.
            end_index = start_index + content_blocks * self.block_size
            input_chunk = vector[:, start_index:end_index]
            start_index = end_index

            # Reshape the entries from the input vector.
            content = tf.reshape(input_chunk,
                                 shape=(n, block_height,
                                        content_blocks * block_width),
                                 name='content' + str(r))
            paddings = [[0, 0], [0, 0],
                        [
                            left_zero_blocks * block_width,
                            right_zero_blocks * block_width
                        ]]
            # Concatenate content and zeros to form the next block row.
            rows.append(tf.pad(content, paddings, name='block_row' + str(r)))

        # Concatenate all rows together to get the final block matrix.
        return tf.concat(rows, 1)
Ejemplo n.º 28
0
 def predict(self, data, labels):
     is_training = False
     data = tf.tile(data,[50,1,1,1])
     preprocess_fn_pretrain = get_preprocess_fn(is_training, is_pretrain=True)
     def map_fn(image):
         """Produces multiple transformations of the same batch."""
         xs = []
         for _ in range(2):  # Two transformations
             xs.append(preprocess_fn_pretrain(image))
         image = tf.concat(xs, -1)
         return image
     map_f = tf.map_fn(lambda a: map_fn(a), data)
     images = tf.pad(map_f, [[0, 0]] + [[0, 0]] * (map_f.shape.ndims - 1))
     features_list = tf.split(images, num_or_size_splits=2, axis=-1)
     features = tf.concat(features_list, 0)
     a = self.hub_module.signatures['default'](features)
     c_loss,_,_=add_contrastive_loss(hidden=a['proj_head_output'],temperature=0.5)
     return c_loss
Ejemplo n.º 29
0
def upscale2d_conv2d(x, fmaps, kernel, fused_scale='auto', **kwargs):
    assert kernel >= 1 and kernel % 2 == 1
    assert fused_scale in [True, False, 'auto']
    if fused_scale == 'auto':
        fused_scale = min(x.shape[2:]) * 2 >= 128

    # Not fused => call the individual ops directly.
    if not fused_scale:
        return conv2d(upscale2d(x), fmaps, kernel, **kwargs)

    # Fused => perform both ops simultaneously using tf.nn.conv2d_transpose().
    w = get_weight([kernel, kernel, x.shape[1], fmaps], **kwargs)
    w = tf.transpose(w, [0, 1, 3, 2]) # [kernel, kernel, fmaps_out, fmaps_in]
    w = tf.pad(w, [[1,1], [1,1], [0,0], [0,0]], mode='CONSTANT')
    w = tf.add_n([w[1:, 1:], w[:-1, 1:], w[1:, :-1], w[:-1, :-1]])
    w = tf.cast(w, x.dtype)
    os = [tf.shape(x)[0], fmaps, x.shape[2] * 2, x.shape[3] * 2]
    return tf.nn.conv2d_transpose(x, w, os, strides=[1,1,2,2], padding='SAME', data_format='NCHW')
Ejemplo n.º 30
0
def StitchImages(images):
    # images is [batch, x, y, c]
    batch, width, _, channels = tf.unstack(tf.shape(images))
    num_per_side = tf.to_int32(tf.ceil(tf.sqrt(tf.to_float(batch))))
    new_width = num_per_side * width
    paddings = tf.concat([
        tf.zeros([4, 1], dtype=tf.int32),
        tf.stack([num_per_side * num_per_side - batch, 0, 0, 0])[Ellipsis,
                                                                 tf.newaxis]
    ], -1)
    images = tf.pad(images, paddings)

    images = tf.transpose(images, [1, 0, 2, 3])
    images = tf.reshape(images, [width, num_per_side, new_width, channels])
    images = tf.transpose(images, [1, 0, 2, 3])
    images = tf.reshape(images, [1, new_width, new_width, channels])

    return images