Beispiel #1
0
	def _enas_layer(self, layer_id, prev_layers, arc, out_filters):
		"""
		Args:
		  layer_id: current layer
		  prev_layers: cache of previous layers. for skip connections
		  start_idx: where to start looking at. technically, we can infer this
			from layer_id, but why bother...
		"""

		assert len(prev_layers) == 2, "need exactly 2 inputs"
		layers = [prev_layers[0], prev_layers[1]]
		layers = self._maybe_calibrate_size(layers, out_filters,
											is_training=True)  
		used = []
		for cell_id in range(self.num_cells):  
			prev_layers = tf.stack(layers, axis=0)  
			with tf.variable_scope("cell_{0}".format(cell_id)):
				with tf.variable_scope("x"):
					x_id = arc[4 * cell_id]  
					x_op = arc[4 * cell_id + 1] 
					x = prev_layers[x_id, :, :, :, :]
					x = self._enas_cell(x, cell_id, x_id, x_op, out_filters)
					x_used = tf.one_hot(x_id, depth=self.num_cells + 2, dtype=tf.int32)

				with tf.variable_scope("y"):
					y_id = arc[4 * cell_id + 2]
					y_op = arc[4 * cell_id + 3]
					y = prev_layers[y_id, :, :, :, :]
					y = self._enas_cell(y, cell_id, y_id, y_op, out_filters)
					y_used = tf.one_hot(y_id, depth=self.num_cells + 2,
										dtype=tf.int32)  

				out = x + y  
				used.extend([x_used, y_used])
				layers.append(out)  

		
		used = tf.add_n(used)  
		indices = tf.where(tf.equal(used, 0))  
		indices = tf.to_int32(indices)  
		indices = tf.reshape(indices, [-1])
		num_outs = tf.size(indices)  
		out = tf.stack(layers, axis=0)  
		out = tf.gather(out, indices, axis=0)  

		inp = prev_layers[0]  
		if self.data_format == "NHWC":
			N = tf.shape(inp)[0]
			H = tf.shape(inp)[1]
			W = tf.shape(inp)[2]
			C = tf.shape(inp)[3]
			out = tf.transpose(out, [1, 2, 3, 0, 4])
			out = tf.reshape(out, [N, H, W, num_outs * out_filters])
		elif self.data_format == "NCHW":
			N = tf.shape(inp)[0]
			C = tf.shape(inp)[1]
			H = tf.shape(inp)[2]
			W = tf.shape(inp)[3]
			out = tf.transpose(out, [1, 0, 2, 3, 4])  
			out = tf.reshape(out,
							 [N, num_outs * out_filters, H, W])  
		else:
			raise ValueError("Unknown data_format '{0}'".format(self.data_format))

		with tf.variable_scope("final_conv"):
			w = create_weight("w", [self.num_cells + 2, out_filters * out_filters])
			w = tf.gather(w, indices, axis=0)
			w = tf.reshape(w, [1, 1, num_outs * out_filters, out_filters])
			out = tf.nn.relu(out)
			out = tf.nn.conv2d(out, w, strides=[1, 1, 1, 1], padding="SAME",
							   data_format=self.data_format)
			out = batch_norm(out, is_training=True, data_format=self.data_format)

		out = tf.reshape(out, tf.shape(prev_layers[0]))

		return out
Beispiel #2
0
def piecewise_linear(boundaries, values, name=None):
    """Piecewise linear function assuming given values at given boundaries.

  Args:
    boundaries: A list of `Tensor`s or `int`s or `float`s with strictly
      increasing entries. The first entry must be 0.
    values: A list of `Tensor`s or float`s or `int`s that specifies the values
      at the `boundaries`. It must have the same number of elements as
      `boundaries`, and all elements should have the same type.
    name: A string. Optional name of the operation. Defaults to
      'PiecewiseConstant'.

  Returns:
    A 0-D Tensor. Its value is `values[0]` if `x < boundaries[0]` and
    `values[-1]` if `x >= boundaries[-1]. If `boundaries[i] <= x <
    boundaries[i+1]` it is the linear interpolation between `values[i]` and
    `values[i+1]`: `values[i] + (values[i+1]-values[i]) * (x-boundaries[i]) /
    (boundaries[i+1]-boundaries[i])`.

  Raises:
    AssertionError: if values or boundaries is empty, or not the same size.
  """
    global_step = tf.train.get_or_create_global_step()
    with tf.name_scope(name, 'PiecewiseLinear',
                       [global_step, boundaries, values, name]) as name:
        values = tf.convert_to_tensor(values)
        x = tf.cast(tf.convert_to_tensor(global_step), values.dtype)
        boundaries = tf.cast(tf.convert_to_tensor(boundaries), values.dtype)

        num_boundaries = np.prod(boundaries.shape.as_list())
        num_values = np.prod(values.shape.as_list())
        assert num_boundaries > 0, 'Need more than 0 boundaries'
        assert num_values > 0, 'Need more than 0 values'
        assert num_values == num_boundaries, (
            'boundaries and values must be of '
            'same size')

        # Make sure there is an unmet last boundary with the same value as the
        # last one that was passed in, and at least one boundary was met.
        values = tf.concat([values, tf.reshape(values[-1], [1])], 0)
        boundaries = tf.concat(
            [boundaries,
             tf.reshape(tf.maximum(x + 1, boundaries[-1]), [1])], 0)

        # Make sure there is at least one boundary that was already met, with the
        # same value as the first one that was passed in.
        values = tf.concat([tf.reshape(values[0], [1]), values], 0)
        boundaries = tf.concat(
            [tf.reshape(tf.minimum(x - 1, boundaries[0]), [1]), boundaries], 0)

        # Identify index of the last boundary that was passed.
        unreached_boundaries = tf.reshape(tf.where(tf.greater(boundaries, x)),
                                          [-1])
        unreached_boundaries = tf.concat(
            [unreached_boundaries, [tf.cast(tf.size(boundaries), tf.int64)]],
            0)
        index = tf.reshape(tf.reduce_min(unreached_boundaries), [1])

        # Get values at last and next boundaries.
        value_left = tf.reshape(tf.slice(values, index - 1, [1]), [])
        left_boundary = tf.reshape(tf.slice(boundaries, index - 1, [1]), [])
        value_right = tf.reshape(tf.slice(values, index, [1]), [])
        right_boundary = tf.reshape(tf.slice(boundaries, index, [1]), [])

        # Calculate linear interpolation.
        a = (value_right - value_left) / (right_boundary - left_boundary)
        b = value_left - a * left_boundary
        return a * x + b
Beispiel #3
0
  def parse_train_data(self, data):
    """Parse data for ShapeMask training."""
    classes = data['groundtruth_classes']
    boxes = data['groundtruth_boxes']
    masks = data['groundtruth_instance_masks']
    is_crowds = data['groundtruth_is_crowd']
    # Skips annotations with `is_crowd` = True.
    if self._skip_crowd_during_training and self._is_training:
      num_groundtrtuhs = tf.shape(classes)[0]
      with tf.control_dependencies([num_groundtrtuhs, is_crowds]):
        indices = tf.cond(
            tf.greater(tf.size(is_crowds), 0),
            lambda: tf.where(tf.logical_not(is_crowds))[:, 0],
            lambda: tf.cast(tf.range(num_groundtrtuhs), tf.int64))
      classes = tf.gather(classes, indices)
      boxes = tf.gather(boxes, indices)
      masks = tf.gather(masks, indices)

    # If not using category, makes all categories with id = 1.
    if not self._use_category:
      classes = tf.cast(tf.greater(classes, 0), dtype=tf.int32)

    image = self.get_normalized_image(data)

    # Flips image randomly during training.
    if self._aug_rand_hflip:
      image, boxes, masks = input_utils.random_horizontal_flip(
          image, boxes, masks)

    # Converts boxes from normalized coordinates to pixel coordinates.
    image_shape = tf.shape(image)[0:2]
    boxes = box_utils.denormalize_boxes(boxes, image_shape)

    # Resizes and crops image.
    image, image_info = input_utils.resize_and_crop_image(
        image,
        self._output_size,
        self._output_size,
        aug_scale_min=self._aug_scale_min,
        aug_scale_max=self._aug_scale_max)
    self._train_image_scale = image_info[2, :]
    self._train_offset = image_info[3, :]

    # Resizes and crops boxes and masks.
    boxes = input_utils.resize_and_crop_boxes(boxes, self._train_image_scale,
                                              image_info[1, :],
                                              self._train_offset)

    # Filters out ground truth boxes that are all zeros.
    indices = box_utils.get_non_empty_box_indices(boxes)
    boxes = tf.gather(boxes, indices)
    classes = tf.gather(classes, indices)
    masks = tf.gather(masks, indices)

    # Assigns anchors.
    input_anchor = anchor.Anchor(
        self._min_level, self._max_level, self._num_scales,
        self._aspect_ratios, self._anchor_size, self._output_size)
    anchor_labeler = anchor.AnchorLabeler(
        input_anchor, self._match_threshold, self._unmatched_threshold)
    (cls_targets,
     box_targets,
     num_positives) = anchor_labeler.label_anchors(
         boxes,
         tf.cast(tf.expand_dims(classes, axis=1), tf.float32))

    # Sample groundtruth masks/boxes/classes for mask branch.
    num_masks = tf.shape(masks)[0]
    mask_shape = tf.shape(masks)[1:3]

    # Randomly shuffle groundtruth masks for mask branch training.
    rand_indices = tf.random.shuffle(tf.range(num_masks))
    shuffled_boxes = tf.gather(boxes, rand_indices)
    shuffled_classes = tf.gather(classes, rand_indices)
    shuffled_masks = tf.gather(masks, rand_indices)

    # Pad sampled boxes/masks/classes to a constant batch size. If the image
    # has more masks than `num_sampled_masks`, the tensor will be clipped.
    padded_boxes = input_utils.clip_or_pad_to_fixed_size(
        shuffled_boxes, self._num_sampled_masks)
    padded_classes = input_utils.clip_or_pad_to_fixed_size(
        shuffled_classes, self._num_sampled_masks)
    padded_masks = input_utils.clip_or_pad_to_fixed_size(
        shuffled_masks, self._num_sampled_masks)

    # Jitter the sampled boxes to mimic the noisy detections.
    padded_boxes = box_utils.jitter_boxes(
        padded_boxes, noise_scale=self._box_jitter_scale)
    padded_boxes = box_utils.clip_boxes(padded_boxes, self._output_size)
    # Compute mask targets in feature crop. A feature crop fully contains a
    # sampled box.
    mask_outer_boxes = box_utils.compute_outer_boxes(
        padded_boxes, tf.shape(image)[0:2], scale=self._outer_box_scale)
    mask_outer_boxes = box_utils.clip_boxes(mask_outer_boxes, self._output_size)
    # Compensate the offset of mask_outer_boxes to map it back to original image
    # scale.
    mask_outer_boxes_ori = mask_outer_boxes
    mask_outer_boxes_ori += tf.tile(
        tf.expand_dims(self._train_offset, axis=0), [1, 2])
    mask_outer_boxes_ori /= tf.tile(
        tf.expand_dims(self._train_image_scale, axis=0), [1, 2])
    norm_mask_outer_boxes_ori = box_utils.normalize_boxes(
        mask_outer_boxes_ori, mask_shape)

    # Set sampled_masks shape to [batch_size, height, width, 1].
    padded_masks = tf.cast(tf.expand_dims(padded_masks, axis=-1), tf.float32)
    mask_targets = tf.image.crop_and_resize(
        padded_masks,
        norm_mask_outer_boxes_ori,
        box_ind=tf.range(self._num_sampled_masks),
        crop_size=[self._mask_crop_size, self._mask_crop_size],
        method='bilinear',
        extrapolation_value=0,
        name='train_mask_targets')
    mask_targets = tf.where(tf.greater_equal(mask_targets, 0.5),
                            tf.ones_like(mask_targets),
                            tf.zeros_like(mask_targets))
    mask_targets = tf.squeeze(mask_targets, axis=-1)
    if self._up_sample_factor > 1:
      fine_mask_targets = tf.image.crop_and_resize(
          padded_masks,
          norm_mask_outer_boxes_ori,
          box_ind=tf.range(self._num_sampled_masks),
          crop_size=[
              self._mask_crop_size * self._up_sample_factor,
              self._mask_crop_size * self._up_sample_factor
          ],
          method='bilinear',
          extrapolation_value=0,
          name='train_mask_targets')
      fine_mask_targets = tf.where(
          tf.greater_equal(fine_mask_targets, 0.5),
          tf.ones_like(fine_mask_targets), tf.zeros_like(fine_mask_targets))
      fine_mask_targets = tf.squeeze(fine_mask_targets, axis=-1)
    else:
      fine_mask_targets = mask_targets

    # If bfloat16 is used, casts input image to tf.bfloat16.
    if self._use_bfloat16:
      image = tf.cast(image, dtype=tf.bfloat16)

    valid_image = tf.cast(tf.not_equal(num_masks, 0), tf.int32)
    if self._mask_train_class == 'all':
      mask_is_valid = valid_image * tf.ones_like(padded_classes, tf.int32)
    else:
      # Get the intersection of sampled classes with training splits.
      mask_valid_classes = tf.cast(
          tf.expand_dims(
              class_utils.coco_split_class_ids(self._mask_train_class), 1),
          padded_classes.dtype)
      match = tf.reduce_any(
          tf.equal(tf.expand_dims(padded_classes, 0), mask_valid_classes), 0)
      mask_is_valid = valid_image * tf.cast(match, tf.int32)

    # Packs labels for model_fn outputs.
    labels = {
        'cls_targets': cls_targets,
        'box_targets': box_targets,
        'anchor_boxes': input_anchor.multilevel_boxes,
        'num_positives': num_positives,
        'image_info': image_info,
        # For ShapeMask.
        'mask_boxes': padded_boxes,
        'mask_outer_boxes': mask_outer_boxes,
        'mask_targets': mask_targets,
        'fine_mask_targets': fine_mask_targets,
        'mask_classes': padded_classes,
        'mask_is_valid': mask_is_valid,
    }
    return image, labels
def variable_gradient_stability_estimate(model,
                                         tape,
                                         losses,
                                         batchsize,
                                         nelem_per_piece=8,
                                         aggregate_variable_estimates=True):
    """Estimate the symmetric alpha-stable tail index of gradient noise.

  We construct the estimate based on a model and gradient tape and a vector of
  per-instance losses.  The set of losses is grouped into batches and we
  compute per-batch gradients.  The total gradient is used to center the
  per-batch gradients, resulting in a set of independent gradient noise
  samples.  These zero-mean gradient noise samples form the input to a tail
  index estimator.

  Args:
    model: tf.keras.Model.
    tape: tf.GradientTape(persistent=True) that has been used to compute losses.
    losses: Tensor of shape (n,), one loss element per instance.
    batchsize: int, the number of instances per batch.
    nelem_per_piece: int, number of elements to group per block in the tail
      index estimator.  Ideally this is around sqrt(n//batchsize).
    aggregate_variable_estimates: bool, if True all estimates in a tf.Variable
      are mean-reduced.  If False individual estimates for each parameter are
      computed.

  Returns:
    stability_estimate: list of tf.Tensor objects containing the estimates of
    the tail index (stability == alpha).
  """
    n = int(tf.size(losses))  # number of instances
    with tape:
        loss_total = tf.reduce_mean(losses)
        losses_batched = tf.split(losses, n // batchsize)
        loss_batches = list(map(tf.reduce_mean, losses_batched))

    gradients_total = tape.gradient(loss_total, model.trainable_variables)
    gradients_total = _filter_gradient_tensors(gradients_total)
    gradients_batches = list(
        map(lambda loss_i: tape.gradient(loss_i, model.trainable_variables),
            loss_batches))
    gradients_batches = list(map(_filter_gradient_tensors, gradients_batches))

    gradients_noise = list(
        map(
            lambda gradients_batch_j: list(
                map(  # pylint: disable=g-long-lambda
                    lambda grads: grads[1] - grads[0],
                    zip(gradients_total, gradients_batch_j))),
            gradients_batches))

    noises = list(map(tf.stack, zip(*gradients_noise)))
    sample_axis = 0
    invalphas_estimate = list(
        map(
            lambda noise: symmetric_alpha_stable_invstability_estimator(  # pylint: disable=g-long-lambda
                noise, sample_axis, nelem_per_piece),
            noises))

    if aggregate_variable_estimates:
        stability_estimate = list(
            map(lambda invalpha: 1.0 / tf.reduce_mean(invalpha),
                invalphas_estimate))
    else:
        stability_estimate = list(
            map(lambda invalpha: 1.0 / invalpha, invalphas_estimate))

    return stability_estimate
Beispiel #5
0
 def num_unmatched_columns(self):
     return tf.size(self.unmatched_column_indices())
Beispiel #6
0
def dropblock(net,
              is_training,
              keep_prob,
              dropblock_size,
              data_format='channels_first'):
    """DropBlock: a regularization method for convolutional neural networks.

  DropBlock is a form of structured dropout, where units in a contiguous
  region of a feature map are dropped together. DropBlock works better than
  dropout on convolutional layers due to the fact that activation units in
  convolutional layers are spatially correlated.
  See https://arxiv.org/pdf/1810.12890.pdf for details.

  Args:
    net: `Tensor` input tensor.
    is_training: `bool` for whether the model is training.
    keep_prob: `float` or `Tensor` keep_prob parameter of DropBlock. "None"
        means no DropBlock.
    dropblock_size: `int` size of blocks to be dropped by DropBlock.
    data_format: `str` either "channels_first" for `[batch, channels, height,
        width]` or "channels_last for `[batch, height, width, channels]`.
  Returns:
      A version of input tensor with DropBlock applied.
  Raises:
      if width and height of the input tensor are not equal.
  """

    if not is_training or keep_prob is None:
        return net

    tf.logging.info(
        'Applying DropBlock: dropblock_size {}, net.shape {}'.format(
            dropblock_size, net.shape))

    if data_format == 'channels_last':
        _, width, height, _ = net.get_shape().as_list()
    else:
        _, _, width, height = net.get_shape().as_list()
    if width != height:
        raise ValueError('Input tensor with width!=height is not supported.')

    dropblock_size = min(dropblock_size, width)
    # seed_drop_rate is the gamma parameter of DropBlcok.
    seed_drop_rate = (1.0 - keep_prob) * width**2 / dropblock_size**2 / (
        width - dropblock_size + 1)**2

    # Forces the block to be inside the feature map.
    w_i, h_i = tf.meshgrid(tf.range(width), tf.range(width))
    valid_block_center = tf.logical_and(
        tf.logical_and(w_i >= int(dropblock_size // 2),
                       w_i < width - (dropblock_size - 1) // 2),
        tf.logical_and(h_i >= int(dropblock_size // 2),
                       h_i < width - (dropblock_size - 1) // 2))

    valid_block_center = tf.expand_dims(valid_block_center, 0)
    valid_block_center = tf.expand_dims(
        valid_block_center, -1 if data_format == 'channels_last' else 0)

    randnoise = tf.random_uniform(net.shape, dtype=tf.float32)
    block_pattern = (
        1 - tf.cast(valid_block_center, dtype=tf.float32) + tf.cast(
            (1 - seed_drop_rate), dtype=tf.float32) + randnoise) >= 1
    block_pattern = tf.cast(block_pattern, dtype=tf.float32)

    if dropblock_size == width:
        block_pattern = tf.reduce_min(
            block_pattern,
            axis=[1, 2] if data_format == 'channels_last' else [2, 3],
            keepdims=True)
    else:
        if data_format == 'channels_last':
            ksize = [1, dropblock_size, dropblock_size, 1]
        else:
            ksize = [1, 1, dropblock_size, dropblock_size]
        block_pattern = -tf.nn.max_pool(
            -block_pattern,
            ksize=ksize,
            strides=[1, 1, 1, 1],
            padding='SAME',
            data_format='NHWC' if data_format == 'channels_last' else 'NCHW')

    percent_ones = tf.cast(tf.reduce_sum(
        (block_pattern)), tf.float32) / tf.cast(tf.size(block_pattern),
                                                tf.float32)

    net = net / tf.cast(percent_ones, net.dtype) * tf.cast(
        block_pattern, net.dtype)
    return net
def project_distribution(supports,
                         weights,
                         target_support,
                         validate_args=False):
    """Projects a batch of (support, weights) onto target_support.

  Based on equation (7) in (Bellemare et al., 2017):
    https://arxiv.org/abs/1707.06887
  In the rest of the comments we will refer to this equation simply as Eq7.

  This code is not easy to digest, so we will use a running example to clarify
  what is going on, with the following sample inputs:

    * supports =       [[0, 2, 4, 6, 8],
                        [1, 3, 4, 5, 6]]
    * weights =        [[0.1, 0.6, 0.1, 0.1, 0.1],
                        [0.1, 0.2, 0.5, 0.1, 0.1]]
    * target_support = [4, 5, 6, 7, 8]

  In the code below, comments preceded with 'Ex:' will be referencing the above
  values.

  Args:
    supports: Tensor of shape (batch_size, num_dims) defining supports for the
      distribution.
    weights: Tensor of shape (batch_size, num_dims) defining weights on the
      original support points. Although for the CategoricalDQN agent these
      weights are probabilities, it is not required that they are.
    target_support: Tensor of shape (num_dims) defining support of the projected
      distribution. The values must be monotonically increasing. Vmin and Vmax
      will be inferred from the first and last elements of this tensor,
      respectively. The values in this tensor must be equally spaced.
    validate_args: Whether we will verify the contents of the target_support
      parameter.

  Returns:
    A Tensor of shape (batch_size, num_dims) with the projection of a batch of
    (support, weights) onto target_support.

  Raises:
    ValueError: If target_support has no dimensions, or if shapes of supports,
      weights, and target_support are incompatible.
  """
    target_support_deltas = target_support[1:] - target_support[:-1]
    # delta_z = `\Delta z` in Eq7.
    delta_z = target_support_deltas[0]
    validate_deps = []
    supports.shape.assert_is_compatible_with(weights.shape)
    supports[0].shape.assert_is_compatible_with(target_support.shape)
    target_support.shape.assert_has_rank(1)
    if validate_args:
        # Assert that supports and weights have the same shapes.
        validate_deps.append(
            tf.Assert(
                tf.reduce_all(tf.equal(tf.shape(supports), tf.shape(weights))),
                [supports, weights]))
        # Assert that elements of supports and target_support have the same shape.
        validate_deps.append(
            tf.Assert(
                tf.reduce_all(
                    tf.equal(tf.shape(supports)[1], tf.shape(target_support))),
                [supports, target_support]))
        # Assert that target_support has a single dimension.
        validate_deps.append(
            tf.Assert(tf.equal(tf.size(tf.shape(target_support)), 1),
                      [target_support]))
        # Assert that the target_support is monotonically increasing.
        validate_deps.append(
            tf.Assert(tf.reduce_all(target_support_deltas > 0),
                      [target_support]))
        # Assert that the values in target_support are equally spaced.
        validate_deps.append(
            tf.Assert(tf.reduce_all(tf.equal(target_support_deltas, delta_z)),
                      [target_support]))

    with tf.control_dependencies(validate_deps):
        # Ex: `v_min, v_max = 4, 8`.
        v_min, v_max = target_support[0], target_support[-1]
        # Ex: `batch_size = 2`.
        batch_size = tf.shape(supports)[0]
        # `N` in Eq7.
        # Ex: `num_dims = 5`.
        num_dims = tf.shape(target_support)[0]
        # clipped_support = `[\hat{T}_{z_j}]^{V_max}_{V_min}` in Eq7.
        # Ex: `clipped_support = [[[ 4.  4.  4.  6.  8.]]
        #                         [[ 4.  4.  4.  5.  6.]]]`.
        clipped_support = tf.clip_by_value(supports, v_min, v_max)[:, None, :]
        # Ex: `tiled_support = [[[[ 4.  4.  4.  6.  8.]
        #                         [ 4.  4.  4.  6.  8.]
        #                         [ 4.  4.  4.  6.  8.]
        #                         [ 4.  4.  4.  6.  8.]
        #                         [ 4.  4.  4.  6.  8.]]
        #                        [[ 4.  4.  4.  5.  6.]
        #                         [ 4.  4.  4.  5.  6.]
        #                         [ 4.  4.  4.  5.  6.]
        #                         [ 4.  4.  4.  5.  6.]
        #                         [ 4.  4.  4.  5.  6.]]]]`.
        tiled_support = tf.tile([clipped_support], [1, 1, num_dims, 1])
        # Ex: `reshaped_target_support = [[[ 4.]
        #                                  [ 5.]
        #                                  [ 6.]
        #                                  [ 7.]
        #                                  [ 8.]]
        #                                 [[ 4.]
        #                                  [ 5.]
        #                                  [ 6.]
        #                                  [ 7.]
        #                                  [ 8.]]]`.
        reshaped_target_support = tf.tile(target_support[:, None],
                                          [batch_size, 1])
        reshaped_target_support = tf.reshape(reshaped_target_support,
                                             [batch_size, num_dims, 1])
        # numerator = `|clipped_support - z_i|` in Eq7.
        # Ex: `numerator = [[[[ 0.  0.  0.  2.  4.]
        #                     [ 1.  1.  1.  1.  3.]
        #                     [ 2.  2.  2.  0.  2.]
        #                     [ 3.  3.  3.  1.  1.]
        #                     [ 4.  4.  4.  2.  0.]]
        #                    [[ 0.  0.  0.  1.  2.]
        #                     [ 1.  1.  1.  0.  1.]
        #                     [ 2.  2.  2.  1.  0.]
        #                     [ 3.  3.  3.  2.  1.]
        #                     [ 4.  4.  4.  3.  2.]]]]`.
        numerator = tf.abs(tiled_support - reshaped_target_support)
        quotient = 1 - (numerator / delta_z)
        # clipped_quotient = `[1 - numerator / (\Delta z)]_0^1` in Eq7.
        # Ex: `clipped_quotient = [[[[ 1.  1.  1.  0.  0.]
        #                            [ 0.  0.  0.  0.  0.]
        #                            [ 0.  0.  0.  1.  0.]
        #                            [ 0.  0.  0.  0.  0.]
        #                            [ 0.  0.  0.  0.  1.]]
        #                           [[ 1.  1.  1.  0.  0.]
        #                            [ 0.  0.  0.  1.  0.]
        #                            [ 0.  0.  0.  0.  1.]
        #                            [ 0.  0.  0.  0.  0.]
        #                            [ 0.  0.  0.  0.  0.]]]]`.
        clipped_quotient = tf.clip_by_value(quotient, 0, 1)
        # Ex: `weights = [[ 0.1  0.6  0.1  0.1  0.1]
        #                 [ 0.1  0.2  0.5  0.1  0.1]]`.
        weights = weights[:, None, :]
        # inner_prod = `\sum_{j=0}^{N-1} clipped_quotient * p_j(x', \pi(x'))`
        # in Eq7.
        # Ex: `inner_prod = [[[[ 0.1  0.6  0.1  0.  0. ]
        #                      [ 0.   0.   0.   0.  0. ]
        #                      [ 0.   0.   0.   0.1 0. ]
        #                      [ 0.   0.   0.   0.  0. ]
        #                      [ 0.   0.   0.   0.  0.1]]
        #                     [[ 0.1  0.2  0.5  0.  0. ]
        #                      [ 0.   0.   0.   0.1 0. ]
        #                      [ 0.   0.   0.   0.  0.1]
        #                      [ 0.   0.   0.   0.  0. ]
        #                      [ 0.   0.   0.   0.  0. ]]]]`.
        inner_prod = clipped_quotient * weights
        # Ex: `projection = [[ 0.8 0.0 0.1 0.0 0.1]
        #                    [ 0.8 0.1 0.1 0.0 0.0]]`.
        projection = tf.reduce_sum(inner_prod, 3)
        projection = tf.reshape(projection, [batch_size, num_dims])
        return projection
Beispiel #8
0
def _generate_detections_tf(cls_outputs,
                            box_outputs,
                            anchor_boxes,
                            indices,
                            classes,
                            image_id,
                            image_scale,
                            min_score_thresh=MIN_SCORE_THRESH,
                            max_boxes_to_draw=MAX_DETECTIONS_PER_IMAGE,
                            soft_nms_sigma=0.0,
                            iou_threshold=0.5,
                            use_native_nms=True):
  """Generates detections with model outputs and anchors.

  Args:
    cls_outputs: a numpy array with shape [N, 1], which has the highest class
      scores on all feature levels. The N is the number of selected
      top-K total anchors on all levels.  (k being MAX_DETECTION_POINTS)
    box_outputs: a numpy array with shape [N, 4], which stacks box regression
      outputs on all feature levels. The N is the number of selected top-k
      total anchors on all levels. (k being MAX_DETECTION_POINTS)
    anchor_boxes: a numpy array with shape [N, 4], which stacks anchors on all
      feature levels. The N is the number of selected top-k total anchors on
      all levels.
    indices: a numpy array with shape [N], which is the indices from top-k
      selection.
    classes: a numpy array with shape [N], which represents the class
      prediction on all selected anchors from top-k selection.
    image_id: an integer number to specify the image id.
    image_scale: a float tensor representing the scale between original image
      and input image for the detector. It is used to rescale detections for
      evaluating with the original groundtruth annotations.
    min_score_thresh: A float representing the threshold for deciding when to
      remove boxes based on score.
    max_boxes_to_draw: Max number of boxes to draw.
    soft_nms_sigma: A scalar float representing the Soft NMS sigma parameter;
      See Bodla et al, https://arxiv.org/abs/1704.04503).  When
        `soft_nms_sigma=0.0` (which is default), we fall back to standard (hard)
        NMS.
    iou_threshold: A float representing the threshold for deciding whether boxes
      overlap too much with respect to IOU.
    use_native_nms: a bool that indicates whether to use native nms.

  Returns:
    detections: detection results in a tensor with each row representing
      [image_id, y, x, height, width, score, class]
  """
  logging.info('Using tf version of post-processing.')
  anchor_boxes = tf.gather(anchor_boxes, indices)

  scores = tf.math.sigmoid(cls_outputs)
  # apply bounding box regression to anchors
  boxes = decode_box_outputs_tf(
      tf.transpose(box_outputs, [1, 0]), tf.transpose(anchor_boxes, [1, 0]))

  if use_native_nms:
    logging.info('Using native nms.')
    top_detection_idx, scores = tf.image.non_max_suppression_with_scores(
        boxes,
        scores,
        max_boxes_to_draw,
        iou_threshold=iou_threshold,
        score_threshold=min_score_thresh,
        soft_nms_sigma=soft_nms_sigma)
    boxes = tf.gather(boxes, top_detection_idx)
  else:
    logging.info('Using customized nms.')
    scores = tf.expand_dims(scores, axis=1)
    all_detections = tf.concat([boxes, scores], axis=1)
    top_detection_idx = nms_tf(all_detections, iou_threshold)
    detections = tf.gather(all_detections, top_detection_idx)
    scores = detections[:, 4]
    boxes = detections[:, :4]
  height = boxes[:, 2] - boxes[:, 0]
  width = boxes[:, 3] - boxes[:, 1]

  detections = tf.stack([
      tf.cast(tf.tile(image_id, [tf.size(top_detection_idx)]), tf.float32),
      boxes[:, 0] * image_scale,
      boxes[:, 1] * image_scale,
      height * image_scale,
      width * image_scale,
      scores,
      tf.cast(tf.gather(classes, top_detection_idx) + 1, tf.float32)
  ], axis=1)
  return detections
def input_producer(raw_data,
                   batch_size,
                   num_steps,
                   shuffle=False,
                   randomize=False,
                   random_len=False):
    """Produces graph-based input for Penn Treebank.

  Args:
    raw_data: np tensor of size [num_words].
    batch_size: self-explained.
    num_steps: number of BPTT steps.
    shuffle: whether to shuffle sentences.
    randomize: use random segments instead of the continuous corpus.
    random_len: random sequence len.

  Returns:
    If `random_len` is set, return op that represents whether we have reached
      the end of a sequence.
    Otherwise, return number of batches in an epoch.
  """

    num_batches_per_epoch = (
        (np.size(raw_data) // batch_size) - 1) // num_steps
    raw_data = tf.convert_to_tensor(raw_data, name='raw_data', dtype=tf.int32)

    data_len = tf.size(raw_data)
    batch_len = data_len // batch_size
    data = tf.reshape(raw_data[0:batch_size * batch_len],
                      [batch_size, batch_len])

    epoch_size = (batch_len - 1) // num_steps
    with tf.device('/cpu:0'):
        epoch_size = tf.identity(epoch_size, name='epoch_size')

        if random_len:
            start_idx = tf.Variable(0,
                                    name='start_idx',
                                    dtype=tf.int32,
                                    trainable=False)
            base_bptt = tf.cond(
                tf.random_uniform(shape=(), minval=0., maxval=1.) < 0.95,
                lambda: tf.cast(num_steps, dtype=tf.float32),
                lambda: tf.cast(num_steps, dtype=tf.float32) / 2.)
            seq_len = tf.random.truncated_normal(shape=(),
                                                 mean=base_bptt,
                                                 stddev=5.,
                                                 dtype=tf.float32)
            seq_len = tf.cast(seq_len, dtype=tf.int32)
            seq_len = tf.minimum(seq_len,
                                 num_steps + 20)  # seq_len <= bptt + 40
            seq_len = tf.minimum(seq_len, batch_len - start_idx - 1)
            end_idx = start_idx + seq_len

            x = data[:, start_idx:end_idx]
            y = data[:, start_idx + 1:end_idx + 1]

            with tf.control_dependencies([x, y]):
                with tf.control_dependencies([tf.assign(start_idx, end_idx)]):
                    should_reset = tf.greater_equal(end_idx, batch_len - 3)

            reset_start_idx = tf.assign(start_idx, 0)
            return (x, y, num_batches_per_epoch, reset_start_idx, should_reset,
                    base_bptt)

        if randomize:
            i = tf.random_uniform([1],
                                  minval=0,
                                  maxval=batch_len - num_steps,
                                  dtype=tf.int32)
            x = tf.strided_slice(data, [0, i], [batch_size, i + num_steps])
            y = tf.strided_slice(data, [0, i + 1],
                                 [batch_size, i + num_steps + 1])
        else:
            i = tf.train.range_input_producer(epoch_size,
                                              shuffle=shuffle).dequeue()
            x = tf.strided_slice(data, [0, i * num_steps],
                                 [batch_size, (i + 1) * num_steps])
            y = tf.strided_slice(data, [0, i * num_steps + 1],
                                 [batch_size, (i + 1) * num_steps + 1])
        x.set_shape([batch_size, num_steps])
        y.set_shape([batch_size, num_steps])

        return x, y, num_batches_per_epoch
def flatten(samples):
    """Flatten the input tensor into a vector."""
    return tf.reshape(samples, (tf.size(samples), ))
Beispiel #11
0
def _count_all_pp(x):
    """Count all objects."""
    # Count distribution (thresholded at 15):

    label = tf.math.minimum(tf.size(x["objects"]["type"]) - 1, 8)
    return {"image": x["image"], "label": label}
Beispiel #12
0
    def train_step(self):
        def step_fn(inputs):
            """Step functon.

      Args:
        inputs: inputs from data iterator

      Returns:
        a set of variables want to observe in Tensorboard
      """

            net = self.net
            (all_images, labels), (self.probe_images,
                                   self.probe_labels) = inputs
            assert len(all_images.shape) == 5
            images, self.aug_images = all_images[:, 0], all_images[:, 1]

            self.images, self.labels = images, labels
            batch_size = int(self.batch_size /
                             self.strategy.num_replicas_in_sync)

            logits = net(images,
                         name='model',
                         reuse=tf.AUTO_REUSE,
                         training=True)
            self.logits = logits

            # other losses
            # initialized first to use self.guessed_label for meta step
            xe_loss, cs_loss = self.unsupervised_loss()

            # meta optimization
            weight, eps, meta_loss, meta_acc = self.meta_optimize()

            ## losses w.r.t new weight and loss
            onehot_labels = tf.one_hot(labels, self.dataset.num_classes)
            onehot_labels = tf.cast(onehot_labels, tf.float32)
            eps_k = tf.reshape(eps, [batch_size, 1])

            mixed_labels = tf.math.add(eps_k * onehot_labels,
                                       (1 - eps_k) * self.guessed_label,
                                       name='mixed_labels')
            net_cost = tf.losses.softmax_cross_entropy(
                mixed_labels, logits, reduction=tf.losses.Reduction.NONE)
            # loss with initial weight
            net_loss1 = tf.reduce_mean(net_cost)

            # loss with initial eps
            init_eps = tf.constant([FLAGS.grad_eps_init] * batch_size,
                                   dtype=tf.float32)
            init_eps = tf.reshape(init_eps, (-1, 1))
            init_mixed_labels = tf.math.add(
                init_eps * onehot_labels, (1 - init_eps) * self.guessed_label,
                name='init_mixed_labels')

            net_cost2 = tf.losses.softmax_cross_entropy(
                init_mixed_labels, logits, reduction=tf.losses.Reduction.NONE)
            net_loss2 = tf.reduce_sum(tf.math.multiply(net_cost2, weight))

            net_loss = (net_loss1 + net_loss2) / 2

            net_loss = net_loss + tf.add_n([xe_loss, cs_loss])
            net_loss += net.regularization_loss
            net_loss /= self.strategy.num_replicas_in_sync

            # rescale by gpus
            with tf.control_dependencies(net.updates):
                net_grads = tf.gradients(net_loss, net.trainable_variables)
                minimizer_op = self.optimizer.apply_gradients(
                    zip(net_grads, net.trainable_variables),
                    global_step=self.global_step)

            with tf.control_dependencies([minimizer_op]):
                train_op = self.ema.apply(net.trainable_variables)

            acc_op, acc_update_op = self.acc_func(labels,
                                                  tf.argmax(logits, axis=1))

            with tf.control_dependencies([train_op, acc_update_op]):
                return (tf.identity(net_loss), tf.identity(xe_loss),
                        tf.identity(cs_loss), tf.identity(meta_loss),
                        tf.identity(meta_acc), tf.identity(acc_op),
                        tf.identity(weight), tf.identity(labels))

        # end of parallel
        (pr_net_loss, pr_xe_loss, pr_cs_loss, pr_metaloss, pr_metaacc, pr_acc,
         pr_weight, pr_labels) = self.strategy.experimental_run_v2(
             step_fn,
             args=((next(self.train_input_iterator),
                    next(self.probe_input_iterator)), ))
        # collect device variables
        weights = self.strategy.unwrap(pr_weight)
        weights = tf.concat(weights, axis=0)
        labels = self.strategy.unwrap(pr_labels)
        labels = tf.concat(labels, axis=0)

        mean_acc = self.strategy.reduce(tf.distribute.ReduceOp.MEAN, pr_acc)
        mean_metaacc = self.strategy.reduce(tf.distribute.ReduceOp.MEAN,
                                            pr_metaacc)
        net_loss = self.strategy.reduce(tf.distribute.ReduceOp.MEAN,
                                        pr_net_loss)
        xe_loss = self.strategy.reduce(tf.distribute.ReduceOp.MEAN, pr_xe_loss)
        cs_loss = self.strategy.reduce(tf.distribute.ReduceOp.MEAN, pr_cs_loss)
        meta_loss = self.strategy.reduce(tf.distribute.ReduceOp.MEAN,
                                         pr_metaloss)

        # The following add variables for tensorboard visualization
        merges = []
        merges.append(tf.summary.scalar('acc/train', mean_acc))
        merges.append(tf.summary.scalar('loss/xemin', xe_loss))
        merges.append(tf.summary.scalar('loss/consistency', cs_loss))
        merges.append(tf.summary.scalar('loss/net', net_loss))
        merges.append(tf.summary.scalar('loss/meta', meta_loss))
        merges.append(tf.summary.scalar('acc/meta', mean_metaacc))

        zw_inds = tf.squeeze(
            tf.where(tf.less_equal(weights, 0), name='zero_weight_index'))
        merges.append(
            tf.summary.scalar(
                'weights/zeroratio',
                tf.math.divide(tf.cast(tf.size(zw_inds), tf.float32),
                               tf.cast(tf.size(weights), tf.float32))))

        self.epoch_var = tf.cast(self.global_step / self.iter_epoch,
                                 tf.float32,
                                 name='epoch')
        merges.append(tf.summary.scalar('epoch', self.epoch_var))
        merges.append(tf.summary.scalar('learningrate', self.learning_rate))
        summary = tf.summary.merge(merges)

        return [
            net_loss, meta_loss, xe_loss, cs_loss, mean_acc, mean_metaacc,
            summary, weights
        ]
Beispiel #13
0
    def _parse_train_data(self, data):
        """Parses data for training and evaluation."""
        classes = data['groundtruth_classes']
        boxes = data['groundtruth_boxes']
        is_crowds = data['groundtruth_is_crowd']
        # Skips annotations with `is_crowd` = True.
        if self._skip_crowd_during_training and self._is_training:
            num_groundtrtuhs = tf.shape(classes)[0]
            with tf.control_dependencies([num_groundtrtuhs, is_crowds]):
                indices = tf.cond(
                    tf.greater(tf.size(is_crowds), 0),
                    lambda: tf.where(tf.logical_not(is_crowds))[:, 0],
                    lambda: tf.cast(tf.range(num_groundtrtuhs), tf.int64))
            classes = tf.gather(classes, indices)
            boxes = tf.gather(boxes, indices)

        # Gets original image and its size.
        image = data['image']

        # NOTE: The autoaugment method works best when used alongside the standard
        # horizontal flipping of images along with size jittering and normalization.
        if self._use_autoaugment:
            try:
                from utils import autoaugment_utils  # pylint: disable=g-import-not-at-top
            except ImportError as e:
                logging.exception('Autoaugment is not supported in TF 2.x.')
                raise e

            image, boxes = autoaugment_utils.distort_image_with_autoaugment(
                image, boxes, self._autoaugment_policy_name)

        image_shape = tf.shape(image)[0:2]

        # Normalizes image with mean and std pixel values.
        image = input_utils.normalize_image(image)

        # Flips image randomly during training.
        if self._aug_rand_hflip:
            image, boxes = input_utils.random_horizontal_flip(image, boxes)

        # Converts boxes from normalized coordinates to pixel coordinates.
        # Now the coordinates of boxes are w.r.t. the original image.
        boxes = box_utils.denormalize_boxes(boxes, image_shape)

        # Resizes and crops image.
        image, image_info = input_utils.resize_and_crop_image(
            image,
            self._output_size,
            padded_size=input_utils.compute_padded_size(
                self._output_size, 2**self._max_level),
            aug_scale_min=self._aug_scale_min,
            aug_scale_max=self._aug_scale_max)
        image_height, image_width, _ = image.get_shape().as_list()

        # Resizes and crops boxes.
        # Now the coordinates of boxes are w.r.t the scaled image.
        image_scale = image_info[2, :]
        offset = image_info[3, :]
        boxes = input_utils.resize_and_crop_boxes(boxes, image_scale,
                                                  (image_height, image_width),
                                                  offset)

        # Filters out ground truth boxes that are all zeros.
        indices = input_utils.get_non_empty_box_indices(boxes)
        boxes = tf.gather(boxes, indices)
        classes = tf.gather(classes, indices)

        # Assigns anchor targets.
        # Note that after the target assignment, box targets are absolute pixel
        # offsets w.r.t. the scaled image.
        input_anchor = anchor.Anchor(self._min_level, self._max_level,
                                     self._num_scales, self._aspect_ratios,
                                     self._anchor_size,
                                     (image_height, image_width))
        anchor_labeler = anchor.AnchorLabeler(input_anchor,
                                              self._match_threshold,
                                              self._unmatched_threshold)
        (cls_targets, box_targets,
         num_positives) = anchor_labeler.label_anchors(
             boxes, tf.cast(tf.expand_dims(classes, axis=1), tf.float32))

        # If bfloat16 is used, casts input image to tf.bfloat16.
        if self._use_bfloat16:
            image = tf.cast(image, dtype=tf.bfloat16)

        # Packs labels for model_fn outputs.
        labels = {
            'cls_targets': cls_targets,
            'box_targets': box_targets,
            'anchor_boxes': input_anchor.multilevel_boxes,
            'num_positives': num_positives,
            'image_info': image_info,
        }
        return image, labels
Beispiel #14
0
def GetEmbeddingLookupList(signals_list,
                           embedding_vars,
                           sparse_ids,
                           sparse_weights=None,
                           combiners='sqrtn',
                           partition_strategies='mod'):
    """Get a list of embedding lookup tensors.

  Args:
    signals_list: A list of strings, representing names of features.
    embedding_vars: Dict mapping feature names to full embedding variables.
    sparse_ids: Dict mapping feature names to SparseTensors of their ids.
    sparse_weights: Either None, or a dict mapping feature names to
      SparseTensors of their weights (which can also be None).
    combiners: Either a common combiner type for all features ('mean', sqrtn' or
      'sum') or a dict mapping each feature name to a combiner type.
    partition_strategies: Either a common partition_strategy for all features
      ('mod' or 'div') or a dict mapping feature_names to partition_stratgies.

  Returns:
    embedding_lookup_list: A list of embedding lookup tensors used for bag of
      words attribution, aligned with signals_list.
  """
    assert isinstance(embedding_vars, dict) and isinstance(sparse_ids, dict)
    assert sparse_weights is None or isinstance(sparse_weights, dict)
    assert combiners in ('mean', 'sqrtn', 'sum') or isinstance(combiners, dict)
    assert (partition_strategies in ('mod', 'div')
            or isinstance(partition_strategies, dict))
    embedding_lookup_list = []
    for signal in signals_list:
        combiner = combiners[signal] if isinstance(combiners,
                                                   dict) else combiners
        partition_strategy = (partition_strategies[signal] if isinstance(
            partition_strategies, dict) else partition_strategies)

        # Batch dimension should be 1 for attribution.
        with tf.control_dependencies(
            [tf.assert_equal(tf.shape(sparse_ids[signal])[0], 1)]):
            embedding_lookup = tf.nn.embedding_lookup(
                params=embedding_vars[signal],
                ids=tf.sparse_tensor_to_dense(sparse_ids[signal]),
                partition_strategy=partition_strategy)
        if sparse_weights is None or sparse_weights[signal] is None:
            num_vals = tf.size(sparse_ids[signal].values)
            if combiner == 'mean':
                embedding_weights = tf.fill([1, num_vals],
                                            1.0 / tf.to_float(num_vals))
            elif combiner == 'sqrtn':
                embedding_weights = tf.fill([1, num_vals], 1.0 /
                                            tf.sqrt(tf.to_float(num_vals)))
            else:
                embedding_weights = tf.ones([1, num_vals], dtype=tf.float32)
        else:
            # Batch dimension should be 1 for attribution.
            with tf.control_dependencies(
                [tf.assert_equal(tf.shape(sparse_weights[signal])[0], 1)]):
                dense_weights = tf.sparse_tensor_to_dense(
                    sparse_weights[signal])
            if combiner == 'mean':
                embedding_weights = dense_weights / tf.reduce_sum(
                    dense_weights)
            elif combiner == 'sqrtn':
                embedding_weights = (
                    dense_weights /
                    tf.sqrt(tf.reduce_sum(tf.pow(dense_weights, 2))))
            else:
                embedding_weights = dense_weights
        embedding_lookup *= tf.expand_dims(embedding_weights, -1)
        embedding_lookup_list.append(embedding_lookup)
    return embedding_lookup_list
Beispiel #15
0
    def _update_mask(self, weights, threshold, gradients):  # pylint: disable=unused-argument
        """Updates the mask for a given weight tensor.

    This functions first computes the cdf of the weight tensor, and estimates
    the threshold value such that 'desired_sparsity' fraction of weights
    have magnitude less than the threshold.

    Args:
      weights: The weight tensor that needs to be masked.
      threshold: The current threshold value. The function will compute a new
        threshold and return the exponential moving average using the current
        value of threshold
      gradients: The gradient tensor that is used for salience calculation.

    Returns:
      new_threshold: The new value of the threshold based on weights, and
        sparsity at the current global_step
      new_mask: A numpy array of the same size and shape as weights containing
        0 or 1 to indicate which of the values in weights falls below
        the threshold

    Raises:
      ValueError: if sparsity is not defined
    """
        if self._sparsity is None:
            raise ValueError('Sparsity variable undefined')

        sparsity = self._get_sparsity(weights.op.name)
        with tf.name_scope(weights.op.name + '_pruning_ops'):
            tf.logging.info('Applying option %s pruning',
                            self._spec.prune_option)
            if self._spec.prune_option == 'weight':
                abs_weights = tf.abs(weights)
            elif self._spec.prune_option in ('first_order_gradient',
                                             'second_order_gradient'):
                if gradients is None:
                    raise ValueError('gradient tensor cannot be None.')
                # gradient variable stores absolute value already
                abs_weights = tf.multiply(tf.abs(weights), gradients)
            else:
                raise ValueError('undefined option')

            k = tf.cast(
                tf.round(
                    tf.cast(tf.size(abs_weights), tf.float32) *
                    (1 - sparsity)), tf.int32)

            # Generate a random shuffling of the weights s.t. the tie-breaker on
            # weight magnitude is random uniform.
            shuffling = tf.random_shuffle(tf.range(tf.size(abs_weights)))
            shuffling = tf.reshape(shuffling, [-1, 1])

            # Flatten the weights and scatter the values randomly.
            abs_weights = tf.reshape(abs_weights, [-1])
            abs_weights = tf.scatter_nd(shuffling, abs_weights,
                                        tf.shape(abs_weights))

            # Sort the entire array
            _, indices = tf.nn.top_k(abs_weights, k=tf.size(abs_weights))

            # `k` is how many non-zero weights we're going to have. Create a new
            # mask where the first `k` elements are set to one and all others are
            # set to zero.
            mask_staging = tf.range(tf.size(abs_weights))
            mask_staging = tf.cast(tf.less(mask_staging, k), tf.float32)

            # Scatter the mask back into the proper positions for the weight matrix.
            indices = tf.reshape(indices, [-1, 1])
            new_mask = tf.scatter_nd(indices, mask_staging,
                                     tf.shape(mask_staging))

            # Un-shuffle the newly created mask.
            new_mask = tf.reshape(tf.gather_nd(new_mask, shuffling),
                                  tf.shape(weights))
        return tf.constant(0, tf.float32), new_mask
Beispiel #16
0
    def decode(self, tf_seq_example_string_tensor):
        """Decodes serialized `tf.SequenceExample`s and returns a tensor dictionary.

    Args:
      tf_seq_example_string_tensor: a string tensor holding a serialized
        `tf.SequenceExample`.

    Returns:
      A list of dictionaries with (at least) the following tensors:
      fields.InputDataFields.source_id: a [num_frames] string tensor with a
        unique ID for each frame.
      fields.InputDataFields.num_groundtruth_boxes: a [num_frames] int32 tensor
        specifying the number of boxes in each frame.
      fields.InputDataFields.groundtruth_boxes: a [num_frames, num_boxes, 4]
        float32 tensor with bounding boxes for each frame. Note that num_boxes
        is the maximum boxes seen in any individual frame. Any frames with fewer
        boxes are padded with 0.0.
      fields.InputDataFields.groundtruth_classes: a [num_frames, num_boxes]
        int32 tensor with class indices for each box in each frame.
      fields.InputDataFields.groundtruth_weights: a [num_frames, num_boxes]
        float32 tensor with weights of the groundtruth boxes.
      fields.InputDataFields.is_annotated: a [num_frames] bool tensor specifying
        whether the image was annotated or not. If False, the corresponding
        entries in the groundtruth tensor will be ignored.
      fields.InputDataFields.context_features - 1D float32 tensor of shape
        [context_feature_length * num_context_features]
      fields.InputDataFields.context_feature_length - int32 tensor specifying
        the length of each feature in context_features
      fields.InputDataFields.image: a [num_frames] string tensor with
        the encoded images.
    """
        serialized_example = tf.reshape(tf_seq_example_string_tensor, shape=[])
        decoder = slim_example_decoder.TFSequenceExampleDecoder(
            self._context_keys_to_features,
            self._sequence_keys_to_feature_lists, self._items_to_handlers)
        keys = decoder.list_items()
        tensors = decoder.decode(serialized_example, items=keys)
        tensor_dict = dict(list(zip(keys, tensors)))
        tensor_dict[fields.InputDataFields.groundtruth_boxes].set_shape(
            [None, None, 4])
        tensor_dict[fields.InputDataFields.num_groundtruth_boxes] = tf.cast(
            tensor_dict[fields.InputDataFields.num_groundtruth_boxes],
            dtype=tf.int32)
        tensor_dict[fields.InputDataFields.groundtruth_classes] = tf.cast(
            tensor_dict[fields.InputDataFields.groundtruth_classes],
            dtype=tf.int32)
        tensor_dict[
            fields.InputDataFields.original_image_spatial_shape] = tf.cast(
                tf.stack([
                    tensor_dict[fields.InputDataFields.image_height],
                    tensor_dict[fields.InputDataFields.image_width]
                ]),
                dtype=tf.int32)
        tensor_dict.pop(fields.InputDataFields.image_height)
        tensor_dict.pop(fields.InputDataFields.image_width)

        def default_groundtruth_weights():
            """Produces weights of 1.0 for each valid box, and 0.0 otherwise."""
            num_boxes_per_frame = tensor_dict[
                fields.InputDataFields.num_groundtruth_boxes]
            max_num_boxes = tf.reduce_max(num_boxes_per_frame)
            num_boxes_per_frame_tiled = tf.tile(
                tf.expand_dims(num_boxes_per_frame, axis=-1),
                multiples=tf.stack([1, max_num_boxes]))
            range_tiled = tf.tile(tf.expand_dims(tf.range(max_num_boxes),
                                                 axis=0),
                                  multiples=tf.stack(
                                      [tf.shape(num_boxes_per_frame)[0], 1]))
            return tf.cast(tf.greater(num_boxes_per_frame_tiled, range_tiled),
                           tf.float32)

        tensor_dict[fields.InputDataFields.groundtruth_weights] = tf.cond(
            tf.greater(
                tf.size(
                    tensor_dict[fields.InputDataFields.groundtruth_weights]),
                0),
            lambda: tensor_dict[fields.InputDataFields.groundtruth_weights],
            default_groundtruth_weights)

        if self._fully_annotated:
            tensor_dict[fields.InputDataFields.is_annotated] = tf.ones_like(
                tensor_dict[fields.InputDataFields.num_groundtruth_boxes],
                dtype=tf.bool)
        else:
            tensor_dict[fields.InputDataFields.is_annotated] = tf.cast(
                tensor_dict[fields.InputDataFields.is_annotated],
                dtype=tf.bool)

        return tensor_dict
Beispiel #17
0
def _buckets(data, bucket_count=None):
    """Create a TensorFlow op to group data into histogram buckets.

    Arguments:
      data: A `Tensor` of any shape. Must be castable to `float64`.
      bucket_count: Optional positive `int` or scalar `int32` `Tensor`.
    Returns:
      A `Tensor` of shape `[k, 3]` and type `float64`. The `i`th row is
      a triple `[left_edge, right_edge, count]` for a single bucket.
      The value of `k` is either `bucket_count` or `1` or `0`.
    """
    # TODO(nickfelt): remove on-demand imports once dep situation is fixed.
    import tensorflow.compat.v1 as tf

    if bucket_count is None:
        bucket_count = summary_v2.DEFAULT_BUCKET_COUNT
    with tf.name_scope("buckets",
                       values=[data, bucket_count]), tf.control_dependencies([
                           tf.assert_scalar(bucket_count),
                           tf.assert_type(bucket_count, tf.int32)
                       ]):
        data = tf.reshape(data, shape=[-1])  # flatten
        data = tf.cast(data, tf.float64)
        is_empty = tf.equal(tf.size(input=data), 0)

        def when_empty():
            return tf.constant([], shape=(0, 3), dtype=tf.float64)

        def when_nonempty():
            min_ = tf.reduce_min(input_tensor=data)
            max_ = tf.reduce_max(input_tensor=data)
            range_ = max_ - min_
            is_singular = tf.equal(range_, 0)

            def when_nonsingular():
                bucket_width = range_ / tf.cast(bucket_count, tf.float64)
                offsets = data - min_
                bucket_indices = tf.cast(tf.floor(offsets / bucket_width),
                                         dtype=tf.int32)
                clamped_indices = tf.minimum(bucket_indices, bucket_count - 1)
                # Use float64 instead of float32 to avoid accumulating floating point error
                # later in tf.reduce_sum when summing more than 2^24 individual `1.0` values.
                # See https://github.com/tensorflow/tensorflow/issues/51419 for details.
                one_hots = tf.one_hot(clamped_indices,
                                      depth=bucket_count,
                                      dtype=tf.float64)
                bucket_counts = tf.cast(
                    tf.reduce_sum(input_tensor=one_hots, axis=0),
                    dtype=tf.float64,
                )
                edges = tf.linspace(min_, max_, bucket_count + 1)
                left_edges = edges[:-1]
                right_edges = edges[1:]
                return tf.transpose(
                    a=tf.stack([left_edges, right_edges, bucket_counts]))

            def when_singular():
                center = min_
                bucket_starts = tf.stack([center - 0.5])
                bucket_ends = tf.stack([center + 0.5])
                bucket_counts = tf.stack(
                    [tf.cast(tf.size(input=data), tf.float64)])
                return tf.transpose(
                    a=tf.stack([bucket_starts, bucket_ends, bucket_counts]))

            return tf.cond(is_singular, when_singular, when_nonsingular)

        return tf.cond(is_empty, when_empty, when_nonempty)
def get_iterator(src_dataset,
                 tgt_dataset,
                 src_vocab_table,
                 tgt_vocab_table,
                 batch_size,
                 global_batch_size,
                 sos,
                 eos,
                 random_seed,
                 num_buckets,
                 src_max_len=None,
                 tgt_max_len=None,
                 num_parallel_calls=4,
                 output_buffer_size=None,
                 skip_count=None,
                 num_shards=1,
                 shard_index=0,
                 reshuffle_each_iteration=True,
                 filter_oversized_sequences=False,
                 return_raw=False):
    """Function that returns input dataset."""
    # Total number of examples in src_dataset/tgt_dataset
    if not output_buffer_size:
        output_buffer_size = global_batch_size * 100

    src_eos_id = tf.cast(src_vocab_table.lookup(tf.constant(eos)), tf.int32)

    tgt_sos_id = tf.cast(tgt_vocab_table.lookup(tf.constant(sos)), tf.int32)
    tgt_eos_id = tf.cast(tgt_vocab_table.lookup(tf.constant(eos)), tf.int32)

    src_tgt_dataset = tf.data.Dataset.zip((src_dataset, tgt_dataset))

    src_tgt_dataset = src_tgt_dataset.shard(num_shards, shard_index)
    if skip_count is not None:
        src_tgt_dataset = src_tgt_dataset.skip(skip_count)

    src_tgt_dataset = src_tgt_dataset.map(
        lambda src, tgt:
        (tf.string_split([src]).values, tf.string_split([tgt]).values),
        num_parallel_calls=num_parallel_calls).prefetch(output_buffer_size)

    # Filter zero length input sequences.
    src_tgt_dataset = src_tgt_dataset.filter(
        lambda src, tgt: tf.logical_and(tf.size(src) > 0,
                                        tf.size(tgt) > 0))

    # Filter oversized input sequences (542 examples are filtered).
    if filter_oversized_sequences:
        src_tgt_dataset = src_tgt_dataset.filter(
            lambda src, tgt: tf.logical_and(
                tf.size(src) <= src_max_len - 2,
                tf.size(tgt) <= tgt_max_len - 1))

    if src_max_len:
        src_tgt_dataset = src_tgt_dataset.map(
            lambda src, tgt: (src[:src_max_len - 2], tgt),
            num_parallel_calls=num_parallel_calls).prefetch(output_buffer_size)
    if tgt_max_len:
        src_tgt_dataset = src_tgt_dataset.map(
            lambda src, tgt: (src, tgt[:tgt_max_len]),
            num_parallel_calls=num_parallel_calls).prefetch(output_buffer_size)

    # Convert the word strings to ids.  Word strings that are not in the
    # vocab get the lookup table's default_value integer.
    src_tgt_dataset = src_tgt_dataset.map(
        lambda src, tgt: (tf.cast(src_vocab_table.lookup(src), tf.int32),
                          tf.cast(tgt_vocab_table.lookup(tgt), tf.int32)),
        num_parallel_calls=num_parallel_calls)

    src_tgt_dataset = src_tgt_dataset.prefetch(output_buffer_size)
    # Create a tgt_input prefixed with <sos> and a tgt_output suffixed with <eos>.
    src_tgt_dataset = src_tgt_dataset.map(lambda src, tgt: (tf.concat(
        ([tgt_sos_id], src, [src_eos_id]), 0), tf.concat(
            ([tgt_sos_id], tgt), 0), tf.concat((tgt, [tgt_eos_id]), 0)),
                                          num_parallel_calls=num_parallel_calls
                                          ).prefetch(output_buffer_size)
    # Add in sequence lengths.
    src_tgt_dataset = src_tgt_dataset.map(
        lambda src, tgt_in, tgt_out:
        (src, tgt_in, tgt_out, tf.size(src), tf.size(tgt_in)),
        num_parallel_calls=num_parallel_calls)
    if return_raw:

        def map_fn(src, tgt_in, tgt_out, src_len, tgt_len):
            """Pad the dataset and emit the bucket id as key."""
            src = tf.pad(src, [[0, src_max_len - tf.size(src)]],
                         constant_values=src_eos_id)
            tgt_in = tf.pad(tgt_in, [[0, tgt_max_len - tf.size(tgt_in)]],
                            constant_values=tgt_eos_id)
            tgt_out = tf.pad(tgt_out, [[0, tgt_max_len - tf.size(tgt_out)]],
                             constant_values=tgt_eos_id)
            bucket_width = (src_max_len + num_buckets - 1) // num_buckets
            bucket_id = tf.cast(
                tf.minimum(
                    num_buckets,
                    tf.maximum(src_len // bucket_width,
                               tgt_len // bucket_width)), tf.int32)
            return tf.concat([
                src, tgt_in, tgt_out,
                tf.reshape(src_len, [1]),
                tf.reshape(tgt_len, [1]),
                tf.reshape(bucket_id, [1])
            ], 0)

        src_tgt_dataset = src_tgt_dataset.map(
            map_fn, num_parallel_calls=num_parallel_calls)
        return src_tgt_dataset.batch(1024)

    src_tgt_dataset = src_tgt_dataset.prefetch(output_buffer_size)

    src_tgt_dataset = src_tgt_dataset.cache()
    # TODO(saeta): investigate shuffle_and_repeat.
    src_tgt_dataset = src_tgt_dataset.shuffle(
        output_buffer_size, random_seed, reshuffle_each_iteration).repeat()

    # Bucket by source sequence length (buckets for lengths 0-9, 10-19, ...)
    def batching_func(x):
        return x.padded_batch(
            batch_size,
            # The first three entries are the source and target line rows;
            # these have unknown-length vectors.  The last two entries are
            # the source and target row sizes; these are scalars.
            padded_shapes=(
                tf.TensorShape([src_max_len]),  # src
                tf.TensorShape([tgt_max_len]),  # tgt_input
                tf.TensorShape([tgt_max_len]),  # tgt_output
                tf.TensorShape([]),  # src_len
                tf.TensorShape([])),  # tgt_len
            # Pad the source and target sequences with eos tokens.
            # (Though notice we don't generally need to do this since
            # later on we will be masking out calculations past the true sequence.
            padding_values=(
                src_eos_id,  # src
                tgt_eos_id,  # tgt_input
                tgt_eos_id,  # tgt_output
                0,  # src_len -- unused
                0),
            # For TPU, must set drop_remainder to True or batch size will be None
            drop_remainder=True)  # tgt_len -- unused

    if num_buckets > 1:

        def key_func(unused_1, unused_2, unused_3, src_len, tgt_len):
            """Calculate bucket_width by maximum source sequence length."""
            # Pairs with length [0, bucket_width) go to bucket 0, length
            # [bucket_width, 2 * bucket_width) go to bucket 1, etc.  Pairs with length
            # over ((num_bucket-1) * bucket_width) words all go into the last bucket.
            if src_max_len:
                bucket_width = (src_max_len + num_buckets - 1) // num_buckets
            else:
                bucket_width = 10

            # Bucket sentence pairs by the length of their source sentence and target
            # sentence.
            bucket_id = tf.maximum(src_len // bucket_width,
                                   tgt_len // bucket_width)
            return tf.to_int64(tf.minimum(num_buckets, bucket_id))

        def reduce_func(unused_key, windowed_data):
            return batching_func(windowed_data)

        batched_dataset = src_tgt_dataset.apply(
            tf.data.experimental.group_by_window(
                key_func=key_func,
                reduce_func=reduce_func,
                window_size=global_batch_size))
    else:
        batched_dataset = batching_func(src_tgt_dataset)

# Make_one_shot_iterator is not applicable here since we have lookup table.
# Instead return a tf.data.dataset and let TpuEstimator to initialize and make
# iterator out of it.
    batched_dataset = batched_dataset.map(
        lambda src, tgt_in, tgt_out, source_size, tgt_in_size:
        ({
            "source": src,
            "target_input": tgt_in,
            "target_output": tgt_out,
            "source_sequence_length": source_size,
            "target_sequence_length": tgt_in_size
        }))
    return batched_dataset
Beispiel #19
0
  def train_step(self):

    def step_fn(inputs):
      """Step function."""

      net = self.net
      (images, labels), (self.probe_images, self.probe_labels) = inputs
      self.images, self.labels = images, labels

      logits = net(images, name='model', reuse=tf.AUTO_REUSE, training=True)
      self.logits = logits

      net_cost = tf.losses.sparse_softmax_cross_entropy(
          labels, logits, reduction=tf.losses.Reduction.NONE)
      weight, meta_loss, meta_acc = self.meta_optimize(net_cost)

      net_loss = tf.reduce_sum(tf.math.multiply(net_cost, weight))
      net_loss += net.regularization_loss
      net_loss /= self.strategy.num_replicas_in_sync
      # rescale by gpus
      net_grads = tf.gradients(net_loss, net.trainable_variables)
      minimizer_op = self.optimizer.apply_gradients(
          zip(net_grads, net.trainable_variables), global_step=self.global_step)
      if FLAGS.use_ema:
        ema_op = self.ema.apply(net.trainable_variables)
        optimizer_op = tf.group([net.updates, minimizer_op, ema_op])
      else:
        optimizer_op = tf.group([net.updates, minimizer_op])
      acc_op, acc_update_op = self.acc_func(labels, tf.argmax(logits, axis=1))

      with tf.control_dependencies([optimizer_op, acc_update_op]):
        return tf.identity(net_loss), tf.identity(meta_loss),\
               tf.identity(meta_acc), tf.identity(acc_op),\
               tf.identity(weight), tf.identity(labels)

    # end of parallel
    (pr_net_loss, pr_metaloss, pr_metaacc, pr_acc,
     pr_weight, pr_labels) = self.strategy.run(
         step_fn, args=(next(self.train_input_iterator),))

    # collect device variables
    weights = self.strategy.unwrap(pr_weight)
    weights = tf.concat(weights, axis=0)
    labels = self.strategy.unwrap(pr_labels)
    labels = tf.concat(labels, axis=0)

    mean_acc = self.strategy.reduce(tf.distribute.ReduceOp.MEAN, pr_acc)
    mean_metaacc = self.strategy.reduce(tf.distribute.ReduceOp.MEAN, pr_metaacc)
    net_loss = self.strategy.reduce(tf.distribute.ReduceOp.MEAN, pr_net_loss)
    meta_loss = self.strategy.reduce(tf.distribute.ReduceOp.MEAN, pr_metaloss)

    merges = []
    merges.append(tf.summary.scalar('acc/train', mean_acc))
    merges.append(tf.summary.scalar('loss/net', net_loss))
    merges.append(tf.summary.scalar('loss/meta', meta_loss))
    merges.append(tf.summary.scalar('acc/meta', mean_metaacc))

    zw_inds = tf.squeeze(
        tf.where(tf.less_equal(weights, 0), name='zero_weight_index'))
    merges.append(
        tf.summary.scalar(
            'weights/zeroratio',
            tf.math.divide(
                tf.cast(tf.size(zw_inds), tf.float32),
                tf.cast(tf.size(weights), tf.float32))))

    self.epoch_var = tf.cast(
        self.global_step / self.iter_epoch, tf.float32, name='epoch')
    merges.append(tf.summary.scalar('epoch', self.epoch_var))
    merges.append(tf.summary.scalar('learningrate', self.learning_rate))
    summary = tf.summary.merge(merges)

    return [net_loss, meta_loss, mean_acc, mean_metaacc, summary, weights]
def get_infer_iterator(dataset_data,
                       dataset_kb,
                       vocab_table,
                       batch_size,
                       eod,
                       len_action,
                       output_buffer_size=None,
                       skip_count=None,
                       num_shards=1,
                       shard_index=0,
                       self_play=False):
    """can be used to generate inference or self play iterators."""
    if not output_buffer_size:
        output_buffer_size = batch_size * 1000
    eod_id = tf.cast(vocab_table.lookup(tf.constant(eod)),
                     tf.int32)  # for padding

    combined_dataset = tf.data.Dataset.zip((dataset_data, dataset_kb))
    combined_dataset = combined_dataset.shard(num_shards, shard_index)

    if skip_count is not None:
        combined_dataset = combined_dataset.skip(skip_count)

    # do not shuffle iterate on inference and self play mode
    # data is shuffled outside of iterator
    combined_dataset = combined_dataset.filter(
        lambda data, kb: tf.logical_and(tf.size(data) > 0,
                                        tf.size(kb) > 0))

    if not self_play:
        get_sub_fu = get_sub_items_infer
        process_entry_fn = partial(process_entry_infer,
                                   vocab_table=vocab_table)
    else:
        get_sub_fu = get_sub_items_self_play
        process_entry_fn = partial(process_entry_self_play,
                                   vocab_table=vocab_table)

    combined_dataset = combined_dataset.map(get_sub_fu)
    combined_dataset = combined_dataset.map(process_entry_fn)

    def batching_func(x):
        return x.padded_batch(
            batch_size,
            padded_shapes=(
                tf.TensorShape([None]),  # intent
                tf.TensorShape([]),  # intent_len
                tf.TensorShape([None]),  # source dialogue
                tf.TensorShape([None]),  # target dialogue
                tf.TensorShape([]),  # dialogue_len
                tf.TensorShape([len_action]),  # predicted action
                tf.TensorShape([]),  # action_len
                tf.TensorShape([len_action]),  # trueth  action
                tf.TensorShape([None]),  # reward diag
                tf.TensorShape([len_action]),  # reward action
                tf.TensorShape([None]),  # kb
                tf.TensorShape([]),  # kb_len
                tf.TensorShape([None]),  # mask1
                tf.TensorShape([None]),  # mask2
                tf.TensorShape([None]),  # turn_point
            ),  # action
            padding_values=(
                eod_id,  # src
                0,  # tgt_input
                eod_id,  # source
                eod_id,  # target
                0,
                eod_id,  # predicted action
                0,  # action len
                eod_id,  # truth action
                0.0,  # reward diag
                0.0,  # reward action
                eod_id,  # src_len -- unused
                0,
                False,  # mask 1
                False,  # mask 2
                0.0)  # turn point
        )

    batched_dataset = batching_func(combined_dataset)

    batched_iter = tf.data.make_initializable_iterator(batched_dataset)
    return batched_iter
Beispiel #21
0
def run_box_to_gaussian(logdir, verbose=False):
    """Run a box-blur-to-Gaussian-blur demonstration.

    See the summary description for more details.

    Arguments:
      logdir: Directory into which to write event logs.
      verbose: Boolean; whether to log any output.
    """
    if verbose:
        logger.info("--- Starting run: box_to_gaussian")

    tf.reset_default_graph()
    tf.set_random_seed(0)

    image = get_image(verbose=verbose)
    blur_radius = tf.placeholder(shape=(), dtype=tf.int32)
    with tf.name_scope("filter"):
        blur_side_length = blur_radius * 2 + 1
        pixel_filter = tf.ones((blur_side_length, blur_side_length))
        pixel_filter = pixel_filter / tf.cast(tf.size(input=pixel_filter),
                                              tf.float32)  # normalize

    iterations = 4
    images = [tf.cast(image, tf.float32) / 255.0]
    for _ in xrange(iterations):
        images.append(convolve(images[-1], pixel_filter))
    with tf.name_scope("convert_to_uint8"):
        images = tf.stack([
            tf.cast(255 * tf.clip_by_value(image_, 0.0, 1.0), tf.uint8)
            for image_ in images
        ])

    summ = image_summary.op(
        "box_to_gaussian",
        images,
        max_outputs=iterations,
        display_name="Gaussian blur as a limit process of box blurs",
        description=(
            "Demonstration of forming a Gaussian blur by "
            "composing box blurs, each of which can be expressed "
            "as a 2D convolution.\n\n"
            "A Gaussian blur is formed by convolving a Gaussian "
            "kernel over an image. But a Gaussian kernel is "
            "itself the limit of convolving a constant kernel "
            "with itself many times. Thus, while applying "
            "a box-filter convolution just once produces "
            "results that are noticeably different from those "
            "of a Gaussian blur, repeating the same convolution "
            "just a few times causes the result to rapidly "
            "converge to an actual Gaussian blur.\n\n"
            "Here, the step value controls the blur radius, "
            "and the image sample controls the number of times "
            "that the convolution is applied (plus one). "
            "So, when *sample*=1, the original image is shown; "
            "*sample*=2 shows a box blur; and a hypothetical "
            "*sample*=&infin; would show a true Gaussian blur.\n\n"
            "This is one ingredient in a recipe to compute very "
            "fast Gaussian blurs. The other pieces require "
            "special treatment for the box blurs themselves "
            "(decomposition to dual one-dimensional box blurs, "
            "each of which is computed with a sliding window); "
            "we don&rsquo;t perform those optimizations here.\n\n"
            "[Here are some slides describing the full process.]"
            "(%s)\n\n"
            "%s" % (
                "http://elynxsdk.free.fr/ext-docs/Blur/Fast_box_blur.pdf",
                IMAGE_CREDIT,
            )),
    )

    with tf.Session() as sess:
        sess.run(image.initializer)
        writer = tf.summary.FileWriter(os.path.join(logdir, "box_to_gaussian"))
        writer.add_graph(sess.graph)
        for step in xrange(8):
            if verbose:
                logger.info("--- box_to_gaussian: step: %s" % step)
                feed_dict = {blur_radius: step}
            run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
            run_metadata = config_pb2.RunMetadata()
            s = sess.run(
                summ,
                feed_dict=feed_dict,
                options=run_options,
                run_metadata=run_metadata,
            )
            writer.add_summary(s, global_step=step)
            writer.add_run_metadata(run_metadata, "step_%04d" % step)
        writer.close()
def get_iterator(dataset_data,
                 dataset_kb,
                 vocab_table,
                 batch_size,
                 t1,
                 t2,
                 eod,
                 len_action,
                 random_seed,
                 num_buckets,
                 max_dialogue_len=None,
                 output_buffer_size=None,
                 skip_count=None,
                 num_shards=1,
                 shard_index=0):
    """can be used to generate supervised learning iterators."""
    if not output_buffer_size:
        output_buffer_size = batch_size * 1000
    eod_id = tf.cast(vocab_table.lookup(tf.constant(eod)), tf.int32)
    t1_id = tf.cast(vocab_table.lookup(tf.constant(t1)), tf.int32)
    t2_id = tf.cast(vocab_table.lookup(tf.constant(t2)), tf.int32)

    combined_dataset = tf.data.Dataset.zip((dataset_data, dataset_kb))
    combined_dataset = combined_dataset.shard(num_shards, shard_index)

    if skip_count is not None:
        combined_dataset = combined_dataset.skip(skip_count)

    combined_dataset = combined_dataset.shuffle(output_buffer_size,
                                                random_seed)
    combined_dataset = combined_dataset.filter(
        lambda data, kb: tf.logical_and(tf.size(data) > 0,
                                        tf.size(kb) > 0))

    combined_dataset = combined_dataset.map(get_sub_items_supervised)
    combined_dataset = combined_dataset.map(
        partial(process_entry_supervised,
                vocab_table=vocab_table,
                t1_id=t1_id,
                t2_id=t2_id))

    def batching_func(x):
        return x.padded_batch(
            batch_size,
            padded_shapes=(
                tf.TensorShape([None]),  # intent
                tf.TensorShape([]),  # intent_len
                tf.TensorShape([None]),  # source dialogue
                tf.TensorShape([None]),  # target dialogue
                tf.TensorShape([]),  # dialogue_len
                tf.TensorShape([len_action]),  # action
                tf.TensorShape([]),  # action_len
                tf.TensorShape([len_action]),  # pred_action
                tf.TensorShape([None]),  # reward_diag
                tf.TensorShape([len_action]),  # reward_action
                tf.TensorShape([None]),  # kb
                tf.TensorShape([]),  # kb_len
                tf.TensorShape([None]),  # mask1
                tf.TensorShape([None]),  # mask2
                tf.TensorShape([None]),  # turn_point
            ),  # action
            padding_values=(
                eod_id,  # src
                0,  # tgt_input
                eod_id,  # source
                eod_id,  # target
                0,  # diag len
                eod_id,  # action
                0,  # action len
                eod_id,  # pred_action
                0.0,  # reward diag
                0.0,  # reward action
                eod_id,  # kb
                0,  # kb len
                False,  # mask 1
                False,  # mask 2
                0.0)  # turn point
        )

    if num_buckets > 1:

        def key_func(unused_1, unused_2, unused_3, unused_4, dialogue_len,
                     unused_6, unused_7, unused_8, unused_9, unused_10,
                     unused_11, unused_12, unused_13, unused_14, unused_15):
            bucket_width = (max_dialogue_len + num_buckets - 1) // num_buckets
            bucket_id = dialogue_len // bucket_width
            return tf.to_int64(tf.minimum(num_buckets, bucket_id))

        def reduce_func(unused_key, windowed_data):
            return batching_func(windowed_data)

        batched_dataset = combined_dataset.apply(
            contrib.data.group_by_window(key_func=key_func,
                                         reduce_func=reduce_func,
                                         window_size=batch_size))

    else:
        batched_dataset = batching_func(combined_dataset)

    batched_iter = tf.data.make_initializable_iterator(batched_dataset)
    return batched_iter
Beispiel #23
0
def real_svg_top(body_output,
                 unused_targets,
                 model_hparams,
                 unused_vocab_size,
                 hard=False):
    """Applies the Mixture Density Network on top of the LSTM outputs.

  Args:
    body_output: outputs from LSTM with shape [batch, seqlen, 1, hidden_size]
    unused_targets: what the ground truth SVG outputted should be (unused).
    model_hparams: hyper-parameters, should include num_mixture,
      mix_temperature, and gauss_temperature.
    unused_vocab_size: unused
    hard: whether to force predict mode functionality, or return all MDN
      components

  Returns:
    The MDN output. Could be shape [batch, seqlen, 1, 10] if in predict mode
      (or hard=True) or shape [batch, seqlen, 1, 4 + 6 * num_mix * 3], in train.
  """
    # mixture of gaussians for 6 args plus 4 extra states for cmds
    num_mix = model_hparams.num_mixture
    nout = 4 + 6 * num_mix * 3

    # the 'hard' option is meant to be used if 'top' is called within body
    with tf.variable_scope('real_top', reuse=tf.AUTO_REUSE):
        ret = tf.layers.dense(body_output, nout, name='top')
        batch_size = common_layers.shape_list(ret)[0]

        if hard or model_hparams.mode == tf.estimator.ModeKeys.PREDICT:
            temperature = model_hparams.mix_temperature

            # apply temperature, do softmax
            command = tf.identity(ret[:, :, :, :4]) / temperature
            command = tf.exp(command -
                             tf.reduce_max(command, axis=[-1], keepdims=True))
            command = command / tf.reduce_sum(
                command, axis=[-1], keepdims=True)

            # sample from the given probs, this is the same as get_pi_idx,
            # and already returns not soft prob
            command = tf.distributions.Categorical(probs=command).sample()
            # this is now [batch, seq, 1], need to make it one_hot
            command = tf.one_hot(command, 4)

            arguments = ret[:, :, :, 4:]
            # args are [batch, seq, 1, 6*3*num_mix]. want [batch * seq * 6, 3*num_mix]
            arguments = tf.reshape(arguments, [-1, 3 * num_mix])

            out_logmix, out_mean, out_logstd = _get_mdn_coef(arguments)
            # these are [batch*seq*6, num_mix]

            # apply temp to logmix
            out_logmix = tf.identity(out_logmix) / temperature
            out_logmix = tf.exp(
                out_logmix -
                tf.reduce_max(out_logmix, axis=[-1], keepdims=True))
            out_logmix = out_logmix / tf.reduce_sum(
                out_logmix, axis=[-1], keepdims=True)
            # get_pi_idx
            out_logmix = tf.distributions.Categorical(
                probs=out_logmix).sample()
            # should now be [batch*seq*6, 1]
            out_logmix = tf.cast(out_logmix, tf.int32)
            out_logmix = tf.reshape(out_logmix, [-1])
            # prepare for gather
            out_logmix = tf.stack([tf.range(tf.size(out_logmix)), out_logmix],
                                  axis=-1)

            chosen_mean = tf.gather_nd(out_mean, out_logmix)
            chosen_logstd = tf.gather_nd(out_logstd, out_logmix)

            # sample!!
            rand_gaussian = (tf.random.normal(tf.shape(chosen_mean)) *
                             tf.sqrt(model_hparams.gauss_temperature))
            arguments = chosen_mean + tf.exp(chosen_logstd) * rand_gaussian
            arguments = tf.reshape(arguments, [batch_size, -1, 1, 6])

            # concat with the command we picked!
            ret = tf.concat([command, arguments], axis=-1)

    return ret
def process_data(object_str, vocab_table):
    """prelinminary process of dialogue data."""
    separated = tf.string_split([object_str]).values
    indices = tf.cast(vocab_table.lookup(separated), tf.int32)
    return indices, tf.size(indices)
Beispiel #25
0
 def num_ignored_columns(self):
     return tf.size(self.ignored_column_indices())
Beispiel #26
0
 def _axis_size(x, axis=None):
     """Get number of elements of `x` in `axis`, as type `x.dtype`."""
     if axis is None:
         return tf.cast(tf.size(x), x.dtype)
     return tf.cast(tf.reduce_prod(tf.gather(tf.shape(x), axis)), x.dtype)
Beispiel #27
0
    def decode(self, tf_example_string_tensor):
        """Decodes serialized tensorflow example and returns a tensor dictionary.

    Args:
      tf_example_string_tensor: a string tensor holding a serialized tensorflow
        example proto.

    Returns:
      A dictionary of the following tensors.
      fields.InputDataFields.image - 3D uint8 tensor of shape [None, None, 3]
        containing image.
      fields.InputDataFields.original_image_spatial_shape - 1D int32 tensor of
        shape [2] containing shape of the image.
      fields.InputDataFields.source_id - string tensor containing original
        image id.
      fields.InputDataFields.key - string tensor with unique sha256 hash key.
      fields.InputDataFields.filename - string tensor with original dataset
        filename.
      fields.InputDataFields.groundtruth_boxes - 2D float32 tensor of shape
        [None, 4] containing box corners.
      fields.InputDataFields.groundtruth_classes - 1D int64 tensor of shape
        [None] containing classes for the boxes.
      fields.InputDataFields.groundtruth_weights - 1D float32 tensor of
        shape [None] indicating the weights of groundtruth boxes.
      fields.InputDataFields.groundtruth_area - 1D float32 tensor of shape
        [None] containing containing object mask area in pixel squared.
      fields.InputDataFields.groundtruth_is_crowd - 1D bool tensor of shape
        [None] indicating if the boxes enclose a crowd.

    Optional:
      fields.InputDataFields.groundtruth_image_confidences - 1D float tensor of
        shape [None] indicating if a class is present in the image (1.0) or
        a class is not present in the image (0.0).
      fields.InputDataFields.image_additional_channels - 3D uint8 tensor of
        shape [None, None, num_additional_channels]. 1st dim is height; 2nd dim
        is width; 3rd dim is the number of additional channels.
      fields.InputDataFields.groundtruth_difficult - 1D bool tensor of shape
        [None] indicating if the boxes represent `difficult` instances.
      fields.InputDataFields.groundtruth_group_of - 1D bool tensor of shape
        [None] indicating if the boxes represent `group_of` instances.
      fields.InputDataFields.groundtruth_keypoints - 3D float32 tensor of
        shape [None, num_keypoints, 2] containing keypoints, where the
        coordinates of the keypoints are ordered (y, x).
      fields.InputDataFields.groundtruth_keypoint_visibilities - 2D bool
        tensor of shape [None, num_keypoints] containing keypoint visibilites.
      fields.InputDataFields.groundtruth_instance_masks - 3D float32 tensor of
        shape [None, None, None] containing instance masks.
      fields.InputDataFields.groundtruth_image_classes - 1D int64 of shape
        [None] containing classes for the boxes.
      fields.InputDataFields.multiclass_scores - 1D float32 tensor of shape
        [None * num_classes] containing flattened multiclass scores for
        groundtruth boxes.
      fields.InputDataFields.context_features - 1D float32 tensor of shape
        [context_feature_length * num_context_features]
      fields.InputDataFields.context_feature_length - int32 tensor specifying
        the length of each feature in context_features
    """
        serialized_example = tf.reshape(tf_example_string_tensor, shape=[])
        decoder = slim_example_decoder.TFExampleDecoder(
            self.keys_to_features, self.items_to_handlers)
        keys = decoder.list_items()
        tensors = decoder.decode(serialized_example, items=keys)
        tensor_dict = dict(zip(keys, tensors))
        is_crowd = fields.InputDataFields.groundtruth_is_crowd
        tensor_dict[is_crowd] = tf.cast(tensor_dict[is_crowd], dtype=tf.bool)
        tensor_dict[fields.InputDataFields.image].set_shape([None, None, 3])
        tensor_dict[
            fields.InputDataFields.original_image_spatial_shape] = tf.shape(
                tensor_dict[fields.InputDataFields.image])[:2]

        if fields.InputDataFields.image_additional_channels in tensor_dict:
            channels = tensor_dict[
                fields.InputDataFields.image_additional_channels]
            channels = tf.squeeze(channels, axis=3)
            channels = tf.transpose(channels, perm=[1, 2, 0])
            tensor_dict[
                fields.InputDataFields.image_additional_channels] = channels

        def default_groundtruth_weights():
            return tf.ones([
                tf.shape(
                    tensor_dict[fields.InputDataFields.groundtruth_boxes])[0]
            ],
                           dtype=tf.float32)

        tensor_dict[fields.InputDataFields.groundtruth_weights] = tf.cond(
            tf.greater(
                tf.shape(tensor_dict[
                    fields.InputDataFields.groundtruth_weights])[0], 0),
            lambda: tensor_dict[fields.InputDataFields.groundtruth_weights],
            default_groundtruth_weights)

        if fields.InputDataFields.groundtruth_keypoints in tensor_dict:
            # Set all keypoints that are not labeled to NaN.
            gt_kpt_fld = fields.InputDataFields.groundtruth_keypoints
            gt_kpt_vis_fld = fields.InputDataFields.groundtruth_keypoint_visibilities
            visibilities_tiled = tf.tile(
                tf.expand_dims(tensor_dict[gt_kpt_vis_fld], -1), [1, 1, 2])
            tensor_dict[gt_kpt_fld] = tf.where(
                visibilities_tiled, tensor_dict[gt_kpt_fld],
                np.nan * tf.ones_like(tensor_dict[gt_kpt_fld]))

        if self._expand_hierarchy_labels:
            input_fields = fields.InputDataFields
            image_classes, image_confidences = self._expand_image_label_hierarchy(
                tensor_dict[input_fields.groundtruth_image_classes],
                tensor_dict[input_fields.groundtruth_image_confidences])
            tensor_dict[input_fields.groundtruth_image_classes] = image_classes
            tensor_dict[input_fields.groundtruth_image_confidences] = (
                image_confidences)

            box_fields = [
                fields.InputDataFields.groundtruth_group_of,
                fields.InputDataFields.groundtruth_is_crowd,
                fields.InputDataFields.groundtruth_difficult,
                fields.InputDataFields.groundtruth_area,
                fields.InputDataFields.groundtruth_boxes,
                fields.InputDataFields.groundtruth_weights,
            ]

            def expand_field(field_name):
                return self._expansion_box_field_labels(
                    tensor_dict[input_fields.groundtruth_classes],
                    tensor_dict[field_name])

            # pylint: disable=cell-var-from-loop
            for field in box_fields:
                if field in tensor_dict:
                    tensor_dict[field] = tf.cond(
                        tf.size(tensor_dict[field]) > 0,
                        lambda: expand_field(field),
                        lambda: tensor_dict[field])
            # pylint: enable=cell-var-from-loop

            tensor_dict[input_fields.groundtruth_classes] = (
                self._expansion_box_field_labels(
                    tensor_dict[input_fields.groundtruth_classes],
                    tensor_dict[input_fields.groundtruth_classes], True))

        if fields.InputDataFields.groundtruth_group_of in tensor_dict:
            group_of = fields.InputDataFields.groundtruth_group_of
            tensor_dict[group_of] = tf.cast(tensor_dict[group_of],
                                            dtype=tf.bool)

        if fields.InputDataFields.groundtruth_dp_num_points in tensor_dict:
            tensor_dict[
                fields.InputDataFields.groundtruth_dp_num_points] = tf.cast(
                    tensor_dict[
                        fields.InputDataFields.groundtruth_dp_num_points],
                    dtype=tf.int32)
            tensor_dict[
                fields.InputDataFields.groundtruth_dp_part_ids] = tf.cast(
                    tensor_dict[
                        fields.InputDataFields.groundtruth_dp_part_ids],
                    dtype=tf.int32)

        if fields.InputDataFields.groundtruth_track_ids in tensor_dict:
            tensor_dict[
                fields.InputDataFields.groundtruth_track_ids] = tf.cast(
                    tensor_dict[fields.InputDataFields.groundtruth_track_ids],
                    dtype=tf.int32)

        return tensor_dict
Beispiel #28
0
def _generate_detections_tf(cls_outputs,
                            box_outputs,
                            anchor_boxes,
                            indices,
                            classes,
                            image_id,
                            image_scale,
                            num_classes,
                            min_score_thresh=0.2,
                            max_boxes_to_draw=50,
                            soft_nms_sigma=0.0,
                            iou_threshold=0.5,
                            use_native_nms=False):
    """Generates detections with model outputs and anchors.

  Args:
    cls_outputs: a numpy array with shape [N, 1], which has the highest class
      scores on all feature levels. The N is the number of selected
      top-K total anchors on all levels.  (k being MAX_DETECTION_POINTS)
    box_outputs: a numpy array with shape [N, 4], which stacks box regression
      outputs on all feature levels. The N is the number of selected top-k
      total anchors on all levels. (k being MAX_DETECTION_POINTS)
    anchor_boxes: a numpy array with shape [N, 4], which stacks anchors on all
      feature levels. The N is the number of selected top-k total anchors on
      all levels.
    indices: a numpy array with shape [N], which is the indices from top-k
      selection.
    classes: a numpy array with shape [N], which represents the class
      prediction on all selected anchors from top-k selection.
    image_id: an integer number to specify the image id.
    image_scale: a float tensor representing the scale between original image
      and input image for the detector. It is used to rescale detections for
      evaluating with the original groundtruth annotations.
    num_classes: a integer that indicates the number of classes.
    min_score_thresh: A float representing the threshold for deciding when to
      remove boxes based on score.
    max_boxes_to_draw: Max number of boxes to draw.
    soft_nms_sigma: A scalar float representing the Soft NMS sigma parameter;
      See Bodla et al, https://arxiv.org/abs/1704.04503).  When
        `soft_nms_sigma=0.0` (which is default), we fall back to standard (hard)
        NMS.
    iou_threshold: A float representing the threshold for deciding whether boxes
      overlap too much with respect to IOU.
    use_native_nms: a bool that indicates whether to use native nms.

  Returns:
    detections: detection results in a tensor with each row representing
      [image_id, y, x, height, width, score, class]
  """
    anchor_boxes = tf.gather(anchor_boxes, indices)

    scores = tf.math.sigmoid(cls_outputs)
    # apply bounding box regression to anchors
    boxes = decode_box_outputs_tf(tf.transpose(box_outputs, [1, 0]),
                                  tf.transpose(anchor_boxes, [1, 0]))

    def _else(detections, class_id, indices):
        """Else branch for generating detections."""
        boxes_cls = tf.gather(boxes, indices)
        scores_cls = tf.gather(scores, indices)
        # Select top-scoring boxes in each class and apply non-maximum suppression
        # (nms) for boxes in the same class. The selected boxes from each class are
        # then concatenated for the final detection outputs.

        if use_native_nms:
            top_detection_idx, scores_cls = tf.image.non_max_suppression_with_scores(
                boxes_cls,
                scores_cls,
                max_boxes_to_draw,
                iou_threshold=iou_threshold,
                score_threshold=min_score_thresh,
                soft_nms_sigma=soft_nms_sigma)
            scores_cls = tf.expand_dims(scores_cls, axis=1)
            boxes_cls = tf.gather(boxes_cls, top_detection_idx)
            top_detections_cls = tf.concat([boxes_cls, scores_cls], axis=1)
        else:
            scores_cls = tf.expand_dims(scores_cls, axis=1)
            all_detections_cls = tf.concat([boxes_cls, scores_cls], axis=1)
            top_detection_idx = nms_tf(all_detections_cls, iou_threshold)
            top_detections_cls = tf.gather(all_detections_cls,
                                           top_detection_idx)
        width = top_detections_cls[:, 2] - top_detections_cls[:, 0]
        height = top_detections_cls[:, 3] - top_detections_cls[:, 1]
        top_detections_cls = tf.stack([
            top_detections_cls[:, 1] * image_scale,
            top_detections_cls[:, 0] * image_scale, height * image_scale,
            width * image_scale, top_detections_cls[:, 4]
        ],
                                      axis=-1)

        top_detections_cls = tf.stack([
            tf.cast(tf.repeat(image_id, tf.size(top_detection_idx)),
                    tf.float32), *tf.unstack(top_detections_cls, 5, axis=1),
            tf.repeat(class_id + 1.0, tf.size(top_detection_idx))
        ],
                                      axis=1)

        detections = tf.concat([detections, top_detections_cls], axis=0)

        return detections

    detections = tf.constant([], tf.float32, [0, 7])
    for c in range(num_classes):
        indices_cls = tf.squeeze(tf.where_v2(tf.equal(classes, c)), axis=-1)
        detections = tf.cond(
            tf.equal(tf.size(indices), 0),
            lambda: detections,
            lambda id=c, id_cls=indices_cls: _else(detections, id, id_cls))
    indices_final = tf.argsort(detections[:, -2], direction='DESCENDING')
    detections = tf.gather(detections,
                           indices_final[:max_boxes_to_draw],
                           name='detection')
    return detections
Beispiel #29
0
    def _parse_train_data(self, data):
        """Parses data for training.

    Args:
      data: the decoded tensor dictionary from TfExampleDecoder.

    Returns:
      image: image tensor that is preproessed to have normalized value and
        dimension [output_size[0], output_size[1], 3]
      labels: a dictionary of tensors used for training. The following describes
        {key: value} pairs in the dictionary.
        image_info: a 2D `Tensor` that encodes the information of the image and
          the applied preprocessing. It is in the format of
          [[original_height, original_width], [scaled_height, scaled_width],
        anchor_boxes: ordered dictionary with keys
          [min_level, min_level+1, ..., max_level]. The values are tensor with
          shape [height_l, width_l, 4] representing anchor boxes at each level.
        rpn_score_targets: ordered dictionary with keys
          [min_level, min_level+1, ..., max_level]. The values are tensor with
          shape [height_l, width_l, anchors_per_location]. The height_l and
          width_l represent the dimension of class logits at l-th level.
        rpn_box_targets: ordered dictionary with keys
          [min_level, min_level+1, ..., max_level]. The values are tensor with
          shape [height_l, width_l, anchors_per_location * 4]. The height_l and
          width_l represent the dimension of bounding box regression output at
          l-th level.
        gt_boxes: Groundtruth bounding box annotations. The box is represented
           in [y1, x1, y2, x2] format. The coordinates are w.r.t the scaled
           image that is fed to the network. The tennsor is padded with -1 to
           the fixed dimension [self._max_num_instances, 4].
        gt_classes: Groundtruth classes annotations. The tennsor is padded
          with -1 to the fixed dimension [self._max_num_instances].
        gt_masks: groundtrugh masks cropped by the bounding box and
          resized to a fixed size determined by mask_crop_size.
    """
        classes = data['groundtruth_classes']
        boxes = data['groundtruth_boxes']
        if self._include_mask:
            masks = data['groundtruth_instance_masks']
        if self._visual_feature_distill:
            roi_boxes = data['roi_boxes']
            distill_features = data['groundtruth_visual_features']

        is_crowds = data['groundtruth_is_crowd']
        # Skips annotations with `is_crowd` = True.
        if self._skip_crowd_during_training and self._is_training:
            num_groundtrtuhs = tf.shape(classes)[0]
            with tf.control_dependencies([num_groundtrtuhs, is_crowds]):
                indices = tf.cond(
                    tf.greater(tf.size(is_crowds), 0),
                    lambda: tf.where(tf.logical_not(is_crowds))[:, 0],
                    lambda: tf.cast(tf.range(num_groundtrtuhs), tf.int64))
            classes = tf.gather(classes, indices)
            boxes = tf.gather(boxes, indices)
            if self._include_mask:
                masks = tf.gather(masks, indices)

        # Gets original image and its size.
        image = data['image']
        image_shape = tf.shape(image)[0:2]

        # Normalizes image with mean and std pixel values.
        image = input_utils.normalize_image(image)

        # Flips image randomly during training.
        if self._aug_rand_hflip:
            if self._visual_feature_distill:
                assert self._include_mask
                image, boxes, masks, roi_boxes = input_utils.random_horizontal_flip(
                    image, boxes, masks, roi_boxes)
            if self._include_mask:
                image, boxes, masks = input_utils.random_horizontal_flip(
                    image, boxes, masks)
            else:
                image, boxes = input_utils.random_horizontal_flip(image, boxes)

        # Converts boxes from normalized coordinates to pixel coordinates.
        # Now the coordinates of boxes are w.r.t. the original image.
        boxes = box_utils.denormalize_boxes(boxes, image_shape)
        if self._visual_feature_distill:
            roi_boxes = box_utils.denormalize_boxes(roi_boxes, image_shape)

            # filter out roi boxes smaller than given size
            if self._filter_distill_boxes_size > 0:
                roi_indices = box_utils.get_non_empty_box_indices(
                    roi_boxes, self._filter_distill_boxes_size)
                roi_boxes = tf.gather(roi_boxes, roi_indices)
                distill_features = tf.gather(distill_features, roi_indices)

        # Resizes and crops image.
        image, image_info = input_utils.resize_and_crop_image(
            image,
            self._output_size,
            padded_size=input_utils.compute_padded_size(
                self._output_size, 2**self._max_level),
            aug_scale_min=self._aug_scale_min,
            aug_scale_max=self._aug_scale_max)
        image_height, image_width, _ = image.get_shape().as_list()

        # Resizes and crops boxes.
        # Now the coordinates of boxes are w.r.t the scaled image.
        image_scale = image_info[2, :]
        offset = image_info[3, :]
        boxes = input_utils.resize_and_crop_boxes(boxes, image_scale,
                                                  image_info[1, :], offset)
        if self._visual_feature_distill:
            roi_boxes = input_utils.resize_and_crop_boxes(
                roi_boxes, image_scale, image_info[1, :], offset)

        # Filters out ground truth boxes that are all zeros.
        indices = box_utils.get_non_empty_box_indices(boxes)
        boxes = tf.gather(boxes, indices)
        classes = tf.gather(classes, indices)
        if self._include_mask:
            masks = tf.gather(masks, indices)
            # Transfer boxes to the original image space and do normalization.
            cropped_boxes = boxes + tf.tile(tf.expand_dims(offset, axis=0),
                                            [1, 2])
            cropped_boxes /= tf.tile(tf.expand_dims(image_scale, axis=0),
                                     [1, 2])
            cropped_boxes = box_utils.normalize_boxes(cropped_boxes,
                                                      image_shape)
            num_masks = tf.shape(masks)[0]
            masks = tf.image.crop_and_resize(
                tf.expand_dims(masks, axis=-1),
                cropped_boxes,
                box_indices=tf.range(num_masks, dtype=tf.int32),
                crop_size=[self._mask_crop_size, self._mask_crop_size],
                method='bilinear')
            masks = tf.squeeze(masks, axis=-1)

        # Assigns anchor targets.
        # Note that after the target assignment, box targets are absolute pixel
        # offsets w.r.t. the scaled image.
        input_anchor = anchor.Anchor(self._min_level, self._max_level,
                                     self._num_scales, self._aspect_ratios,
                                     self._anchor_size,
                                     (image_height, image_width))
        anchor_labeler = anchor.RpnAnchorLabeler(input_anchor,
                                                 self._rpn_match_threshold,
                                                 self._rpn_unmatched_threshold,
                                                 self._rpn_batch_size_per_im,
                                                 self._rpn_fg_fraction)
        rpn_score_targets, rpn_box_targets = anchor_labeler.label_anchors(
            boxes, tf.cast(tf.expand_dims(classes, axis=-1), dtype=tf.float32))

        # If bfloat16 is used, casts input image to tf.bfloat16.
        if self._use_bfloat16:
            image = tf.cast(image, dtype=tf.bfloat16)

        # Packs labels for model_fn outputs.
        labels = {
            'anchor_boxes': input_anchor.multilevel_boxes,
            'image_info': image_info,
            'rpn_score_targets': rpn_score_targets,
            'rpn_box_targets': rpn_box_targets,
        }
        labels['gt_boxes'] = input_utils.clip_or_pad_to_fixed_size(
            boxes, self._max_num_instances, -1)
        labels['gt_classes'] = input_utils.clip_or_pad_to_fixed_size(
            classes, self._max_num_instances, -1)
        if self._include_mask:
            labels['gt_masks'] = input_utils.clip_or_pad_to_fixed_size(
                masks, self._max_num_instances, -1)

        if self._visual_feature_distill:
            labels['roi_boxes'] = input_utils.clip_or_pad_to_fixed_size(
                roi_boxes, self._max_num_rois, -1)
            labels['gt_visual_feat'] = input_utils.clip_or_pad_to_fixed_size(
                distill_features, self._max_num_rois, -1)
        return image, labels
Beispiel #30
0
 def _filter_fn(features):  # pylint: disable=missing-docstring
     return tf.less_equal(
         tf.reduce_max(
             tf.stack([tf.size(v) for v in features.values()], axis=0)),
         max_encoded_len)