def create_id3_embedding(videos):
  """Embeds the given videos using the Inflated 3D Convolution network.

  Downloads the graph of the I3D from tf.hub and adds it to the graph on the
  first call.

  Args:
    videos: <float32>[batch_size, num_frames, height=224, width=224, depth=3].
      Expected range is [-1, 1].

  Returns:
    embedding: <float32>[batch_size, embedding_size]. embedding_size depends
               on the model used.

  Raises:
    ValueError: when a provided embedding_layer is not supported.
  """

  batch_size = 16
  module_spec = "https://tfhub.dev/deepmind/i3d-kinetics-400/1"


  # Making sure that we import the graph separately for
  # each different input video tensor.
  module_name = "fvd_kinetics-400_id3_module_" + videos.name.replace(":", "_")

  assert_ops = [
      tf.Assert(
          tf.reduce_max(videos) <= 1.001,
          ["max value in frame is > 1", videos]),
      tf.Assert(
          tf.reduce_min(videos) >= -1.001,
          ["min value in frame is < -1", videos]),
      tf.assert_equal(
          tf.shape(videos)[0],
          batch_size, ["invalid frame batch size: ",
                       tf.shape(videos)],
          summarize=6),
  ]
  with tf.control_dependencies(assert_ops):
    videos = tf.identity(videos)

  module_scope = "%s_apply_default/" % module_name

  # To check whether the module has already been loaded into the graph, we look
  # for a given tensor name. If this tensor name exists, we assume the function
  # has been called before and the graph was imported. Otherwise we import it.
  # Note: in theory, the tensor could exist, but have wrong shapes.
  # This will happen if create_id3_embedding is called with a frames_placehoder
  # of wrong size/batch size, because even though that will throw a tf.Assert
  # on graph-execution time, it will insert the tensor (with wrong shape) into
  # the graph. This is why we need the following assert.
  video_batch_size = int(videos.shape[0])
  assert video_batch_size in [batch_size, -1, None], "Invalid batch size"
  tensor_name = module_scope + "RGB/inception_i3d/Mean:0"
  if not _is_in_graph(tensor_name):
    i3d_model = hub.Module(module_spec, name=module_name)
    i3d_model(videos)

  # gets the kinetics-i3d-400-logits layer
  tensor_name = module_scope + "RGB/inception_i3d/Mean:0"
  tensor = tf.get_default_graph().get_tensor_by_name(tensor_name)
  return tensor
Ejemplo n.º 2
0
def multiclass_non_max_suppression(boxes,
                                   scores,
                                   score_thresh,
                                   iou_thresh,
                                   max_size_per_class,
                                   max_total_size=0,
                                   clip_window=None,
                                   change_coordinate_frame=False,
                                   masks=None,
                                   additional_fields=None,
                                   scope=None):
    """Multi-class version of non maximum suppression.

  This op greedily selects a subset of detection bounding boxes, pruning
  away boxes that have high IOU (intersection over union) overlap (> thresh)
  with already selected boxes.  It operates independently for each class for
  which scores are provided (via the scores field of the input box_list),
  pruning boxes with score less than a provided threshold prior to
  applying NMS.

  Please note that this operation is performed on *all* classes, therefore any
  background classes should be removed prior to calling this function.

  Args:
    boxes: A [k, q, 4] float32 tensor containing k detections. `q` can be either
      number of classes or 1 depending on whether a separate box is predicted
      per class.
    scores: A [k, num_classes] float32 tensor containing the scores for each of
      the k detections.
    score_thresh: scalar threshold for score (low scoring boxes are removed).
    iou_thresh: scalar threshold for IOU (new boxes that have high IOU overlap
      with previously selected boxes are removed).
    max_size_per_class: maximum number of retained boxes per class.
    max_total_size: maximum number of boxes retained over all classes. By
      default returns all boxes retained after capping boxes per class.
    clip_window: A float32 tensor of the form [y_min, x_min, y_max, x_max]
      representing the window to clip and normalize boxes to before performing
      non-max suppression.
    change_coordinate_frame: Whether to normalize coordinates after clipping
      relative to clip_window (this can only be set to True if a clip_window
      is provided)
    masks: (optional) a [k, q, mask_height, mask_width] float32 tensor
      containing box masks. `q` can be either number of classes or 1 depending
      on whether a separate mask is predicted per class.
    additional_fields: (optional) If not None, a dictionary that maps keys to
      tensors whose first dimensions are all of size `k`. After non-maximum
      suppression, all tensors corresponding to the selected boxes will be
      added to resulting BoxList.
    scope: name scope.

  Returns:
    a BoxList holding M boxes with a rank-1 scores field representing
      corresponding scores for each box with scores sorted in decreasing order
      and a rank-1 classes field representing a class label for each box.
      If masks, keypoints, keypoint_heatmaps is not None, the boxlist will
      contain masks, keypoints, keypoint_heatmaps corresponding to boxes.

  Raises:
    ValueError: if iou_thresh is not in [0, 1] or if input boxlist does not have
      a valid scores field.
  """
    if not 0 <= iou_thresh <= 1.0:
        raise ValueError('iou_thresh must be between 0 and 1')
    if scores.shape.ndims != 2:
        raise ValueError('scores field must be of rank 2')
    if scores.shape[1].value is None:
        raise ValueError('scores must have statically defined second '
                         'dimension')
    if boxes.shape.ndims != 3:
        raise ValueError('boxes must be of rank 3.')
    if not (boxes.shape[1].value == scores.shape[1].value
            or boxes.shape[1].value == 1):
        raise ValueError('second dimension of boxes must be either 1 or equal '
                         'to the second dimension of scores')
    if boxes.shape[2].value != 4:
        raise ValueError('last dimension of boxes must be of size 4.')
    if change_coordinate_frame and clip_window is None:
        raise ValueError(
            'if change_coordinate_frame is True, then a clip_window'
            'must be specified.')

    with tf.name_scope(scope, 'MultiClassNonMaxSuppression'):
        num_boxes = tf.shape(boxes)[0]
        num_scores = tf.shape(scores)[0]
        num_classes = scores.get_shape()[1]

        length_assert = tf.Assert(tf.equal(num_boxes, num_scores), [
            'Incorrect scores field length: actual vs expected.', num_scores,
            num_boxes
        ])

        selected_boxes_list = []
        per_class_boxes_list = tf.unstack(boxes, axis=1)
        if masks is not None:
            per_class_masks_list = tf.unstack(masks, axis=1)
        boxes_ids = (range(num_classes)
                     if len(per_class_boxes_list) > 1 else [0] * num_classes)
        for class_idx, boxes_idx in zip(range(num_classes), boxes_ids):
            per_class_boxes = per_class_boxes_list[boxes_idx]
            boxlist_and_class_scores = box_list.BoxList(per_class_boxes)
            with tf.control_dependencies([length_assert]):
                class_scores = tf.reshape(
                    tf.slice(scores, [0, class_idx], tf.stack([num_scores,
                                                               1])), [-1])
            boxlist_and_class_scores.add_field(fields.BoxListFields.scores,
                                               class_scores)
            if masks is not None:
                per_class_masks = per_class_masks_list[boxes_idx]
                boxlist_and_class_scores.add_field(fields.BoxListFields.masks,
                                                   per_class_masks)
            if additional_fields is not None:
                for key, tensor in additional_fields.iteritems():
                    boxlist_and_class_scores.add_field(key, tensor)
            boxlist_filtered = box_list_ops.filter_greater_than(
                boxlist_and_class_scores, score_thresh)
            if clip_window is not None:
                boxlist_filtered = box_list_ops.clip_to_window(
                    boxlist_filtered, clip_window)
                if change_coordinate_frame:
                    boxlist_filtered = box_list_ops.change_coordinate_frame(
                        boxlist_filtered, clip_window)
            max_selection_size = tf.minimum(max_size_per_class,
                                            boxlist_filtered.num_boxes())
            selected_indices = tf.image.non_max_suppression(
                boxlist_filtered.get(),
                boxlist_filtered.get_field(fields.BoxListFields.scores),
                max_selection_size,
                iou_threshold=iou_thresh)
            nms_result = box_list_ops.gather(boxlist_filtered,
                                             selected_indices)
            nms_result.add_field(fields.BoxListFields.classes, (tf.zeros_like(
                nms_result.get_field(fields.BoxListFields.scores)) +
                                                                class_idx))
            selected_boxes_list.append(nms_result)
        selected_boxes = box_list_ops.concatenate(selected_boxes_list)
        sorted_boxes = box_list_ops.sort_by_field(selected_boxes,
                                                  fields.BoxListFields.scores)
        if max_total_size:
            max_total_size = tf.minimum(max_total_size,
                                        sorted_boxes.num_boxes())
            sorted_boxes = box_list_ops.gather(sorted_boxes,
                                               tf.range(max_total_size))
        return sorted_boxes
Ejemplo n.º 3
0
def build_graph(mode, config, sequence_example_file_paths=None):
    """Builds the TensorFlow graph.

  Args:
    mode: 'train', 'eval', or 'generate'. Only mode related ops are added to
        the graph.
    config: An EventSequenceRnnConfig containing the encoder/decoder and HParams
        to use.
    sequence_example_file_paths: A list of paths to TFRecord files containing
        tf.train.SequenceExample protos. Only needed for training and
        evaluation. May be a sharded file of the form.

  Returns:
    A tf.Graph instance which contains the TF ops.

  Raises:
    ValueError: If mode is not 'train', 'eval', or 'generate'.
  """
    if mode not in ('train', 'eval', 'generate'):
        raise ValueError("The mode parameter must be 'train', 'eval', "
                         "or 'generate'. The mode parameter was: %s" % mode)

    hparams = config.hparams
    encoder_decoder = config.encoder_decoder

    tf.logging.info('hparams = %s', hparams.values())

    input_size = encoder_decoder.input_size
    num_classes = encoder_decoder.num_classes
    no_event_label = encoder_decoder.default_event_label

    with tf.Graph().as_default() as graph:
        inputs, labels, lengths, = None, None, None

        if mode == 'train' or mode == 'eval':
            inputs, labels, lengths = magenta.common.get_padded_batch(
                sequence_example_file_paths, hparams.batch_size, input_size)

        elif mode == 'generate':
            inputs = tf.placeholder(tf.float32,
                                    [hparams.batch_size, None, input_size])

        cell = make_rnn_cell(hparams.rnn_layer_sizes,
                             dropout_keep_prob=(1.0 if mode == 'generate' else
                                                hparams.dropout_keep_prob),
                             attn_length=hparams.attn_length)

        initial_state = cell.zero_state(hparams.batch_size, tf.float32)

        outputs, final_state = tf.nn.dynamic_rnn(cell,
                                                 inputs,
                                                 initial_state=initial_state,
                                                 swap_memory=True)

        outputs_flat = tf.reshape(outputs, [-1, cell.output_size])
        logits_flat = tf.contrib.layers.linear(outputs_flat, num_classes)

        if mode == 'train' or mode == 'eval':
            labels_flat = tf.reshape(labels, [-1])
            mask = tf.sequence_mask(lengths)
            if hparams.skip_first_n_losses:
                skip = tf.minimum(lengths, hparams.skip_first_n_losses)
                skip_mask = tf.sequence_mask(skip,
                                             maxlen=tf.reduce_max(lengths))
                mask = tf.logical_and(mask, tf.logical_not(skip_mask))
            mask = tf.cast(mask, tf.float32)
            mask_flat = tf.reshape(mask, [-1])

            num_logits = tf.to_float(tf.reduce_sum(lengths))

            with tf.control_dependencies(
                [tf.Assert(tf.greater(num_logits, 0.), [num_logits])]):
                softmax_cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
                    labels=labels_flat, logits=logits_flat)
            loss = tf.reduce_sum(
                mask_flat * softmax_cross_entropy) / num_logits
            perplexity = (
                tf.reduce_sum(mask_flat * tf.exp(softmax_cross_entropy)) /
                num_logits)

            correct_predictions = tf.to_float(
                tf.nn.in_top_k(logits_flat, labels_flat, 1)) * mask_flat
            accuracy = tf.reduce_sum(correct_predictions) / num_logits * 100

            event_positions = (
                tf.to_float(tf.not_equal(labels_flat, no_event_label)) *
                mask_flat)
            event_accuracy = (tf.reduce_sum(
                tf.multiply(correct_predictions, event_positions)) /
                              tf.reduce_sum(event_positions) * 100)

            no_event_positions = (
                tf.to_float(tf.equal(labels_flat, no_event_label)) * mask_flat)
            no_event_accuracy = (tf.reduce_sum(
                tf.multiply(correct_predictions, no_event_positions)) /
                                 tf.reduce_sum(no_event_positions) * 100)

            global_step = tf.Variable(0, trainable=False, name='global_step')

            tf.add_to_collection('loss', loss)
            tf.add_to_collection('perplexity', perplexity)
            tf.add_to_collection('accuracy', accuracy)
            tf.add_to_collection('global_step', global_step)

            summaries = [
                tf.summary.scalar('loss', loss),
                tf.summary.scalar('perplexity', perplexity),
                tf.summary.scalar('accuracy', accuracy),
                tf.summary.scalar('event_accuracy', event_accuracy),
                tf.summary.scalar('no_event_accuracy', no_event_accuracy),
            ]

            if mode == 'train':
                learning_rate = tf.train.exponential_decay(
                    hparams.initial_learning_rate,
                    global_step,
                    hparams.decay_steps,
                    hparams.decay_rate,
                    staircase=True,
                    name='learning_rate')

                opt = tf.train.AdamOptimizer(learning_rate)
                params = tf.trainable_variables()
                gradients = tf.gradients(loss, params)
                clipped_gradients, _ = tf.clip_by_global_norm(
                    gradients, hparams.clip_norm)
                train_op = opt.apply_gradients(zip(clipped_gradients, params),
                                               global_step)
                tf.add_to_collection('learning_rate', learning_rate)
                tf.add_to_collection('train_op', train_op)

                summaries.append(
                    tf.summary.scalar('learning_rate', learning_rate))

            if mode == 'eval':
                summary_op = tf.summary.merge(summaries)
                tf.add_to_collection('summary_op', summary_op)

        elif mode == 'generate':
            temperature = tf.placeholder(tf.float32, [])
            softmax_flat = tf.nn.softmax(
                tf.div(logits_flat, tf.fill([num_classes], temperature)))
            softmax = tf.reshape(softmax_flat,
                                 [hparams.batch_size, -1, num_classes])

            tf.add_to_collection('inputs', inputs)
            tf.add_to_collection('temperature', temperature)
            tf.add_to_collection('softmax', softmax)
            # Flatten state tuples for metagraph compatibility.
            for state in tf_nest.flatten(initial_state):
                tf.add_to_collection('initial_state', state)
            for state in tf_nest.flatten(final_state):
                tf.add_to_collection('final_state', state)

    return graph
Ejemplo n.º 4
0
def assert_constraints(outputs,
                       monotonicity,
                       output_min,
                       output_max,
                       clamp_min=False,
                       clamp_max=False,
                       debug_tensors=None,
                       eps=1e-6):
    """Asserts that 'outputs' satisfiy constraints.

  Args:
    outputs: Tensor of shape `(num_output_values, units)` which represents
      outputs of pwl calibration layer which will be tested against the given
      constraints. If monotonicity is specified these outputs must be for
      consequtive inputs.
    monotonicity: One of {-1, 0, 1}. -1 for decreasing, 1 for increasing 0 means
      no monotonicity checks.
    output_min: Lower bound or None.
    output_max: Upper bound or None.
    clamp_min: Whether one of outputs must match output_min.
    clamp_max: Whther one of outputs must match output_max.
    debug_tensors: None or list of anything convertible to tensor (for example
      tensors or strings) which will be printed in case of constraints
      violation.
    eps: Allowed constraints violation.

  Raises:
    ValueError: If monotonicity is not one of {-1, 0, 1}

  Returns:
    List of assertion ops in graph mode or immideately asserts in eager mode.
  """

    info = ["Outputs: ", outputs, "Epsilon: ", eps]
    if debug_tensors:
        info += debug_tensors
    asserts = []

    if output_min is not None:
        min_output = tf.reduce_min(outputs, axis=0)
        if clamp_min:
            asserts.append(
                tf.Assert(
                    tf.reduce_all(tf.abs(min_output - output_min) <= eps),
                    data=["Clamp_min violation.", "output_min:", output_min] +
                    info,
                    summarize=outputs.shape[0]))
        else:
            asserts.append(
                tf.Assert(
                    tf.reduce_all(min_output >= output_min - eps),
                    data=["Lower bound violation.", "output_min:", output_min
                          ] + info,
                    summarize=outputs.shape[0]))

    if output_max is not None:
        max_output = tf.reduce_max(outputs, axis=0)
        if clamp_max:
            asserts.append(
                tf.Assert(
                    tf.reduce_all(tf.abs(max_output - output_max) <= eps),
                    data=["Clamp_max violation.", "output_max:", output_max] +
                    info,
                    summarize=outputs.shape[0]))
        else:
            asserts.append(
                tf.Assert(
                    tf.reduce_all(max_output <= output_max + eps),
                    data=["Upper bound violation.", "output_max:", output_max
                          ] + info,
                    summarize=outputs.shape[0]))

    if monotonicity not in [-1, 0, 1]:
        raise ValueError(
            "'monotonicity' must be one of: [-1, 0, 1]. It is: %s" %
            monotonicity)
    if monotonicity != 0:
        diffs = (outputs[1:] - outputs[0:-1])
        asserts.append(
            tf.Assert(
                tf.reduce_min(diffs * monotonicity) >= -eps,
                data=[
                    "Monotonicity violation.", "monotonicity:", monotonicity
                ] + info,
                summarize=outputs.shape[0]))

    return asserts
def _random_crop(image_list, crop_height, crop_width):
    """Crops the given list of images.

  The function applies the same crop to each image in the list. This can be
  effectively applied when there are multiple image inputs of the same
  dimension such as:

    image, depths, normals = _random_crop([image, depths, normals], 120, 150)

  Args:
    image_list: a list of image tensors of the same dimension but possibly
      varying channel.
    crop_height: the new height.
    crop_width: the new width.

  Returns:
    the image_list with cropped images.

  Raises:
    ValueError: if there are multiple image inputs provided with different size
      or the images are smaller than the crop dimensions.
  """
    if not image_list:
        raise ValueError('Empty image_list.')

    # Compute the rank assertions.
    rank_assertions = []
    for i in range(len(image_list)):
        image_rank = tf.rank(image_list[i])
        rank_assert = tf.Assert(tf.equal(image_rank, 3), [
            'Wrong rank for tensor  %s [expected] [actual]',
            image_list[i].name, 3, image_rank
        ])
        rank_assertions.append(rank_assert)

    with tf.control_dependencies([rank_assertions[0]]):
        image_shape = tf.shape(image_list[0])
    image_height = image_shape[0]
    image_width = image_shape[1]
    crop_size_assert = tf.Assert(
        tf.logical_and(tf.greater_equal(image_height, crop_height),
                       tf.greater_equal(image_width, crop_width)),
        ['Crop size greater than the image size.'])

    asserts = [rank_assertions[0], crop_size_assert]

    for i in range(1, len(image_list)):
        image = image_list[i]
        asserts.append(rank_assertions[i])
        with tf.control_dependencies([rank_assertions[i]]):
            shape = tf.shape(image)
        height = shape[0]
        width = shape[1]

        height_assert = tf.Assert(tf.equal(height, image_height), [
            'Wrong height for tensor %s [expected][actual]', image.name,
            height, image_height
        ])
        width_assert = tf.Assert(tf.equal(width, image_width), [
            'Wrong width for tensor %s [expected][actual]', image.name, width,
            image_width
        ])
        asserts.extend([height_assert, width_assert])

    # Create a random bounding box.
    #
    # Use tf.random_uniform and not numpy.random.rand as doing the former would
    # generate random numbers at graph eval time, unlike the latter which
    # generates random numbers at graph definition time.
    with tf.control_dependencies(asserts):
        max_offset_height = tf.reshape(image_height - crop_height + 1, [])
    with tf.control_dependencies(asserts):
        max_offset_width = tf.reshape(image_width - crop_width + 1, [])
    offset_height = tf.random_uniform([],
                                      maxval=max_offset_height,
                                      dtype=tf.int32)
    offset_width = tf.random_uniform([],
                                     maxval=max_offset_width,
                                     dtype=tf.int32)

    return [
        _crop(image, offset_height, offset_width, crop_height, crop_width)
        for image in image_list
    ]
Ejemplo n.º 6
0
def assert_in_range(x, min_value, max_value):
    return tf.Assert(
        tf.logical_and(tf.greater_equal(tf.reduce_min(x), min_value),
                       tf.less_equal(tf.reduce_max(x), max_value)), [x])
Ejemplo n.º 7
0
 def _raise():
     tf.Assert(False, [str(e)])
     return ()
Ejemplo n.º 8
0
def f(x):
	for i in tf.range(10):
		tf.print(i)
		tf.Assert(i<10,['a'])
		x += x
	return x
Ejemplo n.º 9
0
  def generate_trips(self, min_gap=1, max_gap=5):
    """Generate a tf Dataset of training triplets with an offset between three frames.

    Args:
      min_gap: (int) the minimum offset between two frames of a sampled triplet.
      max_gap: (int) the maximum offset between two frames of a sampled triplet.

    Returns:
      A tf.data.Dataset of ViewSequences without images, consisting of
      triplets from the input sequence separated by the given offset.
    """

    def mapper(timestamp_trips, rgb_trips, pano_trips, depth_trips,
               normal_trips, pose_trips):
      """A function mapping a data tuple to ViewTrip."""
      return ViewTrip(self.scene_id, self.sequence_id, timestamp_trips,
                      rgb_trips, pano_trips, depth_trips, normal_trips,
                      tf.zeros([1]), pose_trips, self.intrinsics[0],
                      self.resolution[0])

    with tf.control_dependencies(
        [tf.Assert(tf.less(max_gap, self.length()),
                   [max_gap, self.length()])]):
      timestamp_trips = []
      rgb_trips = []
      pano_trips = []
      depth_trips = []
      normal_trips = []
      pose_trips = []
      # generate triplets with an offset that ranges
      # from 'min_gap' to 'max_gap'.
      for stride in range(min_gap, max_gap + 1):
        inds = tf.range(stride, self.length() - stride)
        inds_jitter = tf.random.uniform(
            minval=-40,
            maxval=40,
            shape=[self.length() - 2 * stride],
            dtype=tf.int32)
        rand_inds = tf.minimum(
            tf.maximum(inds + inds_jitter, 0),
            self.length() - 1)
        timestamp = tf.stack([
            self.timestamp[:-2 * stride], self.timestamp[2 * stride:],
            self.timestamp[stride:-stride],
            tf.gather(self.timestamp, rand_inds)
        ],
                             axis=1)
        rgb = tf.stack([
            self.rgb[:-2 * stride], self.rgb[2 * stride:],
            self.rgb[stride:-stride],
            tf.gather(self.rgb, rand_inds)
        ],
                       axis=1)
        pano = tf.stack([
            self.pano[:-2 * stride], self.pano[2 * stride:],
            self.pano[stride:-stride],
            tf.gather(self.pano, rand_inds)
        ],
                        axis=1)
        depth = tf.stack([
            self.depth[:-2 * stride], self.depth[2 * stride:],
            self.depth[stride:-stride],
            tf.gather(self.depth, rand_inds)
        ],
                         axis=1)
        normal = tf.stack([
            self.normal[:-2 * stride], self.normal[2 * stride:],
            self.normal[stride:-stride],
            tf.gather(self.normal, rand_inds)
        ],
                          axis=1)
        pose = tf.stack([
            self.pose[:-2 * stride], self.pose[2 * stride:],
            self.pose[stride:-stride],
            tf.gather(self.pose, rand_inds)
        ],
                        axis=1)
        timestamp_trips.append(timestamp)
        rgb_trips.append(rgb)
        pano_trips.append(pano)
        depth_trips.append(depth)
        normal_trips.append(normal)
        pose_trips.append(pose)

      timestamp_trips = tf.concat(timestamp_trips, 0)
      rgb_trips = tf.concat(rgb_trips, 0)
      pano_trips = tf.concat(pano_trips, 0)
      depth_trips = tf.concat(depth_trips, 0)
      normal_trips = tf.concat(normal_trips, 0)
      pose_trips = tf.concat(pose_trips, 0)
      dataset = tf.data.Dataset.from_tensor_slices(
          (timestamp_trips, rgb_trips, pano_trips, depth_trips, normal_trips,
           pose_trips))
      return dataset.map(mapper)
Ejemplo n.º 10
0
def train_uvf(train_dir,
              environment=None,
              num_bin_actions=3,
              agent_class=None,
              meta_agent_class=None,
              state_preprocess_class=None,
              inverse_dynamics_class=None,
              exp_action_wrapper=None,
              replay_buffer=None,
              meta_replay_buffer=None,
              replay_num_steps=1,
              meta_replay_num_steps=1,
              critic_optimizer=None,
              actor_optimizer=None,
              meta_critic_optimizer=None,
              meta_actor_optimizer=None,
              repr_optimizer=None,
              relabel_contexts=False,
              meta_relabel_contexts=False,
              batch_size=64,
              repeat_size=0,
              num_episodes_train=2000,
              initial_episodes=2,
              initial_steps=None,
              num_updates_per_observation=1,
              num_collect_per_update=1,
              num_collect_per_meta_update=1,
              gamma=1.0,
              meta_gamma=1.0,
              reward_scale_factor=1.0,
              target_update_period=1,
              should_stop_early=None,
              clip_gradient_norm=0.0,
              summarize_gradients=False,
              debug_summaries=False,
              log_every_n_steps=100,
              prefetch_queue_capacity=2,
              policy_save_dir='policy',
              save_policy_every_n_steps=1000,
              save_policy_interval_secs=0,
              replay_context_ratio=0.0,
              next_state_as_context_ratio=0.0,
              state_index=0,
              zero_timer_ratio=0.0,
              timer_index=-1,
              debug=False,
              max_policies_to_save=None,
              max_steps_per_episode=None,
              load_path=LOAD_PATH):
    """Train an agent."""

    """ Here is a summary of what this method does:
    1) Create the lower level agent, the meta agent, the state preprocessing and the inverse dynamics (what is that?)
    2) Setup automatic summaries of useful things on Tensorboard
    3) Create "Collect experience" for the beginning and the middle parts of training (ops are identical)
    4) Setup the the meta-agent (meta) and low-level agent (nometa) ops (in that order)
        1) Get random batch and add it to a queue (why?)
        2) Dequeue a batch from the queue (and optionally repeat it)
        3) Preprocess the state and the next states
        4) If META, sample best meta action (sample) or sample directly from inverse dynamics (FuN) (what is this?)
        5) If META, create state preprocessing training op (based on state preprocessing op) (what's the loss?)
        6) Sample the current and next contexts (optionally use the real ones instead of the adjusted ones)
        7) Compute the context rewards and discounts (if META, using the raw states, else using the processed states)
        8) Multiply the discounts by the learning rate GAMMA
        9) Create the loss function and training operation for the critic and for the actor
    5) Create the low-level training op (NOMETA actor + NOMETA critic + representation op) 
    6) Create the meta-level training op (META actor + META critic op)
    7) Periodically update the targets of the agent and the meta agent (to add stability during training?)
    8) Save the variables of all the agents and meta vars periodically
    9) Define the train_step_fn that can do a single gradient step
    10) Define the global train_ops (train_op, meta_train_op, collect_experience_op) that defines the loss functions 
    
    Note: at the beginning of the training, 
    1) Initialize the local variables
    2) Update the targets of the agent and meta agents (so that the main and target networks are equal)
    3) Setup the first global step (= 0)
    4) Experience is collected during N initial steps    
    
    
    """

    tf_env = create_maze_env.TFPyEnvironment(environment)
    observation_spec = [tf_env.observation_spec()]
    action_spec = [tf_env.action_spec()]

    max_steps_per_episode = max_steps_per_episode or tf_env.pyenv.max_episode_steps

    assert max_steps_per_episode, 'max_steps_per_episode need to be set'

    if initial_steps is None:
        initial_steps = initial_episodes * max_steps_per_episode

    if agent_class.ACTION_TYPE == 'discrete':
        assert False
    else:
        assert agent_class.ACTION_TYPE == 'continuous'

    assert agent_class.ACTION_TYPE == meta_agent_class.ACTION_TYPE
    with tf.variable_scope('meta_agent'):
        meta_agent = meta_agent_class(
            observation_spec,
            action_spec,
            tf_env,
            debug_summaries=debug_summaries)
    meta_agent.set_replay(replay=meta_replay_buffer)

    with tf.variable_scope('uvf_agent'):
        uvf_agent = agent_class(
            observation_spec,
            action_spec,
            tf_env,
            debug_summaries=debug_summaries)
        uvf_agent.set_meta_agent(agent=meta_agent)
        uvf_agent.set_replay(replay=replay_buffer)

    with tf.variable_scope('state_preprocess'):
        state_preprocess = state_preprocess_class()

    with tf.variable_scope('inverse_dynamics'):
        inverse_dynamics = inverse_dynamics_class(
            meta_agent.sub_context_as_action_specs[0])

    # Create counter variables
    global_step = tf.contrib.framework.get_or_create_global_step()
    num_episodes = tf.Variable(0, dtype=tf.int64, name='num_episodes')
    num_resets = tf.Variable(0, dtype=tf.int64, name='num_resets')
    num_updates = tf.Variable(0, dtype=tf.int64, name='num_updates')
    num_meta_updates = tf.Variable(0, dtype=tf.int64, name='num_meta_updates')
    episode_rewards = tf.Variable([0.] * 100, name='episode_rewards')
    episode_meta_rewards = tf.Variable([0.] * 100, name='episode_meta_rewards')

    # Create counter variables summaries
    train_utils.create_counter_summaries([
        ('environment_steps', global_step),
        ('num_episodes', num_episodes),
        ('num_resets', num_resets),
        ('num_updates', num_updates),
        ('num_meta_updates', num_meta_updates),
        ('replay_buffer_adds', replay_buffer.get_num_adds()),
        ('meta_replay_buffer_adds', meta_replay_buffer.get_num_adds()),
    ])

    tf.summary.scalar('avg_episode_rewards',
                      tf.reduce_mean(episode_rewards[1:]))
    tf.summary.scalar('avg_episode_meta_rewards',
                      tf.reduce_mean(episode_meta_rewards[1:]))
    tf.summary.histogram('episode_rewards', episode_rewards[1:])
    tf.summary.histogram('episode_meta_rewards', episode_meta_rewards[1:])

    # Create init ops
    action_fn = uvf_agent.action
    action_fn = uvf_agent.add_noise_fn(action_fn, global_step=None)
    meta_action_fn = meta_agent.action
    meta_action_fn = meta_agent.add_noise_fn(meta_action_fn, global_step=None)
    meta_actions_fn = meta_agent.actions
    meta_actions_fn = meta_agent.add_noise_fn(meta_actions_fn, global_step=None)
    init_collect_experience_op = collect_experience(
        tf_env,
        uvf_agent,
        meta_agent,
        state_preprocess,
        replay_buffer,
        meta_replay_buffer,
        action_fn,
        meta_action_fn,
        environment_steps=global_step,
        num_episodes=num_episodes,
        num_resets=num_resets,
        episode_rewards=episode_rewards,
        episode_meta_rewards=episode_meta_rewards,
        store_context=True,
        disable_agent_reset=False,
    )

    # Create train ops (exactly the same as above, strangely)
    collect_experience_op = collect_experience(
        tf_env,
        uvf_agent,
        meta_agent,
        state_preprocess,
        replay_buffer,
        meta_replay_buffer,
        action_fn,
        meta_action_fn,
        environment_steps=global_step,
        num_episodes=num_episodes,
        num_resets=num_resets,
        episode_rewards=episode_rewards,
        episode_meta_rewards=episode_meta_rewards,
        store_context=True,
        disable_agent_reset=False,
    )

    train_op_list = []
    repr_train_op = tf.constant(0.0)
    for mode in ['meta', 'nometa']:
        if mode == 'meta':
            agent = meta_agent
            buff = meta_replay_buffer
            critic_opt = meta_critic_optimizer
            actor_opt = meta_actor_optimizer
            relabel = meta_relabel_contexts
            num_steps = meta_replay_num_steps
            my_gamma = meta_gamma,
            n_updates = num_meta_updates
        else:
            agent = uvf_agent
            buff = replay_buffer
            critic_opt = critic_optimizer
            actor_opt = actor_optimizer
            relabel = relabel_contexts
            num_steps = replay_num_steps
            my_gamma = gamma
            n_updates = num_updates

        with tf.name_scope(mode):
            batch = buff.get_random_batch(batch_size, num_steps=num_steps)
            states, actions, rewards, discounts, next_states = batch[:5]
            with tf.name_scope('Reward'):
                tf.summary.scalar('average_step_reward', tf.reduce_mean(rewards))
            rewards *= reward_scale_factor
            batch_queue = slim.prefetch_queue.prefetch_queue(
                [states, actions, rewards, discounts, next_states] + batch[5:],
                capacity=prefetch_queue_capacity,
                name='batch_queue')

            batch_dequeue = batch_queue.dequeue()
            if repeat_size > 0:
                batch_dequeue = [
                    tf.tile(batch, (repeat_size + 1,) + (1,) * (batch.shape.ndims - 1))
                    for batch in batch_dequeue
                ]
                batch_size *= (repeat_size + 1)
            states, actions, rewards, discounts, next_states = batch_dequeue[:5]
            if mode == 'meta':
                low_states = batch_dequeue[5]
                low_actions = batch_dequeue[6]
                low_state_reprs = state_preprocess(low_states)
            state_reprs = state_preprocess(states)
            next_state_reprs = state_preprocess(next_states)

            if mode == 'meta':  # Re-label meta-action
                prev_actions = actions
                if FLAGS.goal_sample_strategy == 'None':
                    pass
                elif FLAGS.goal_sample_strategy == 'FuN':
                    actions = inverse_dynamics.sample(state_reprs, next_state_reprs, 1, prev_actions, sc=0.1)
                    actions = tf.stop_gradient(actions)
                elif FLAGS.goal_sample_strategy == 'sample':
                    actions = sample_best_meta_actions(state_reprs, next_state_reprs, prev_actions,
                                                       low_states, low_actions, low_state_reprs,
                                                       inverse_dynamics, uvf_agent, k=10)
                else:
                    assert False

            if state_preprocess.trainable and mode == 'meta':
                # Representation learning is based on meta-transitions, but is trained
                # along with low-level policy updates.
                repr_loss, _, _ = state_preprocess.loss(states, next_states, low_actions, low_states)
                repr_train_op = slim.learning.create_train_op(
                    repr_loss,
                    repr_optimizer,
                    global_step=None,
                    update_ops=None,
                    summarize_gradients=summarize_gradients,
                    clip_gradient_norm=clip_gradient_norm,
                    variables_to_train=state_preprocess.get_trainable_vars(), )

            # Get contexts for training
            contexts, next_contexts = agent.sample_contexts(
                mode='train', batch_size=batch_size,
                state=states, next_state=next_states,
            )
            if not relabel:  # Re-label context (in the style of TDM or HER).
                contexts, next_contexts = (
                    batch_dequeue[-2 * len(contexts):-1 * len(contexts)],
                    batch_dequeue[-1 * len(contexts):])

            merged_states = agent.merged_states(states, contexts)
            merged_next_states = agent.merged_states(next_states, next_contexts)
            if mode == 'nometa':
                context_rewards, context_discounts = agent.compute_rewards(
                    'train', state_reprs, actions, rewards, next_state_reprs, contexts)
            elif mode == 'meta':  # Meta-agent uses sum of rewards, not context-specific rewards.
                _, context_discounts = agent.compute_rewards(
                    'train', states, actions, rewards, next_states, contexts)
                context_rewards = rewards

            if agent.gamma_index is not None:
                context_discounts *= tf.cast(
                    tf.reshape(contexts[agent.gamma_index], (-1,)),
                    dtype=context_discounts.dtype)
            else:
                context_discounts *= my_gamma

            critic_loss = agent.critic_loss(merged_states, actions,
                                            context_rewards, context_discounts,
                                            merged_next_states)

            critic_loss = tf.reduce_mean(critic_loss)

            actor_loss = agent.actor_loss(merged_states, actions,
                                          context_rewards, context_discounts,
                                          merged_next_states)
            actor_loss *= tf.to_float(  # Only update actor every N steps.
                tf.equal(n_updates % target_update_period, 0))

            critic_train_op = slim.learning.create_train_op(
                critic_loss,
                critic_opt,
                global_step=n_updates,
                update_ops=None,
                summarize_gradients=summarize_gradients,
                clip_gradient_norm=clip_gradient_norm,
                variables_to_train=agent.get_trainable_critic_vars(), )
            critic_train_op = uvf_utils.tf_print(
                critic_train_op, [critic_train_op],
                message='critic_loss',
                print_freq=1000,
                name='critic_loss')
            train_op_list.append(critic_train_op)
            if actor_loss is not None:
                actor_train_op = slim.learning.create_train_op(
                    actor_loss,
                    actor_opt,
                    global_step=None,
                    update_ops=None,
                    summarize_gradients=summarize_gradients,
                    clip_gradient_norm=clip_gradient_norm,
                    variables_to_train=agent.get_trainable_actor_vars(), )
                actor_train_op = uvf_utils.tf_print(
                    actor_train_op, [actor_train_op],
                    message='actor_loss',
                    print_freq=1000,
                    name='actor_loss')
                train_op_list.append(actor_train_op)

    assert len(train_op_list) == 4
    # Update targets should happen after the networks have been updated.
    # control_dependencies ensure what's inside is evaluated before the things inside the with-statement
    with tf.control_dependencies(train_op_list[2:]):
        update_targets_op = uvf_utils.periodically(
            uvf_agent.update_targets, target_update_period, 'update_targets')
    if meta_agent is not None:
        with tf.control_dependencies(train_op_list[:2]):
            update_meta_targets_op = uvf_utils.periodically(
                meta_agent.update_targets, target_update_period, 'update_targets')

    assert_op = tf.Assert(  # Hack to get training to stop.
        tf.less_equal(global_step, 200 + num_episodes_train * max_steps_per_episode),
        [global_step])
    with tf.control_dependencies([update_targets_op, assert_op]):
        train_op = tf.add_n(train_op_list[2:], name='post_update_targets')
        # Representation training steps on every low-level policy training step.
        train_op += repr_train_op
    with tf.control_dependencies([update_meta_targets_op, assert_op]):
        meta_train_op = tf.add_n(train_op_list[:2],
                                 name='post_update_meta_targets')

    if debug_summaries:
        train_.gen_debug_batch_summaries(batch)
        slim.summaries.add_histogram_summaries(
            uvf_agent.get_trainable_critic_vars(), 'critic_vars')
        slim.summaries.add_histogram_summaries(
            uvf_agent.get_trainable_actor_vars(), 'actor_vars')

    train_ops = train_utils.TrainOps(train_op, meta_train_op,
                                     collect_experience_op)

    policy_save_path = os.path.join(train_dir, policy_save_dir, 'model.ckpt')
    policy_vars = uvf_agent.get_actor_vars() + meta_agent.get_actor_vars() + [
        global_step, num_episodes, num_resets
    ] + list(uvf_agent.context_vars) + list(meta_agent.context_vars) + state_preprocess.get_trainable_vars()
    # add critic vars, since some test evaluation depends on them
    policy_vars += uvf_agent.get_trainable_critic_vars() + meta_agent.get_trainable_critic_vars()
    policy_saver = tf.train.Saver(
        policy_vars, max_to_keep=max_policies_to_save, sharded=False)

    lowlevel_vars = (uvf_agent.get_actor_vars() +
                     uvf_agent.get_trainable_critic_vars() +
                     state_preprocess.get_trainable_vars())
    lowlevel_saver = tf.train.Saver(lowlevel_vars)

    def policy_save_fn(sess):
        policy_saver.save(
            sess, policy_save_path, global_step=global_step, write_meta_graph=False)
        if save_policy_interval_secs > 0:
            tf.logging.info(
                'Wait %d secs after save policy.' % save_policy_interval_secs)
            time.sleep(save_policy_interval_secs)

    train_step_fn = train_utils.TrainStep(
        max_number_of_steps=num_episodes_train * max_steps_per_episode + 100,
        num_updates_per_observation=num_updates_per_observation,
        num_collect_per_update=num_collect_per_update,
        num_collect_per_meta_update=num_collect_per_meta_update,
        log_every_n_steps=log_every_n_steps,
        policy_save_fn=policy_save_fn,
        save_policy_every_n_steps=save_policy_every_n_steps,
        should_stop_early=should_stop_early).train_step

    local_init_op = tf.local_variables_initializer()
    init_targets_op = tf.group(uvf_agent.update_targets(1.0),
                               meta_agent.update_targets(1.0))

    def initialize_training_fn(sess):
        """Initialize training function."""
        sess.run(local_init_op)
        sess.run(init_targets_op)
        if load_path:
            tf.logging.info('Restoring low-level from %s' % load_path)
            lowlevel_saver.restore(sess, load_path)
        global_step_value = sess.run(global_step)
        assert global_step_value == 0, 'Global step should be zero.'
        collect_experience_call = sess.make_callable(
            init_collect_experience_op)

        for _ in range(initial_steps):
            collect_experience_call()

    train_saver = tf.train.Saver(max_to_keep=2, sharded=True)
    tf.logging.info('train dir: %s', train_dir)
    return slim.learning.train(
        train_ops,
        train_dir,
        train_step_fn=train_step_fn,
        save_interval_secs=FLAGS.save_interval_secs,
        saver=train_saver,
        log_every_n_steps=0,
        global_step=global_step,
        master="",
        is_chief=(FLAGS.task == 0),
        save_summaries_secs=FLAGS.save_summaries_secs,
        init_fn=initialize_training_fn)
    def extract_features(self, preprocessed_inputs):
        """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]

    Raises:
      ValueError: if image height or width are not 256 pixels.
    """
        image_shape = preprocessed_inputs.get_shape()
        image_shape.assert_has_rank(4)
        image_height = image_shape[1].value
        image_width = image_shape[2].value

        if image_height is None or image_width is None:
            shape_assert = tf.Assert(
                tf.logical_and(tf.equal(tf.shape(preprocessed_inputs)[1], 256),
                               tf.equal(tf.shape(preprocessed_inputs)[2],
                                        256)),
                ['image size must be 256 in both height and width.'])
            with tf.control_dependencies([shape_assert]):
                preprocessed_inputs = tf.identity(preprocessed_inputs)
        elif image_height != 256 or image_width != 256:
            raise ValueError(
                'image size must be = 256 in both height and width;'
                ' image dim = %d,%d' % (image_height, image_width))

        feature_map_layout = {
            'from_layer':
            ['Conv2d_11_pointwise', 'Conv2d_13_pointwise', '', '', ''],
            'layer_depth': [-1, -1, 512, 256, 256],
            'conv_kernel_size': [-1, -1, 3, 3, 2],
            'use_explicit_padding':
            self._use_explicit_padding,
            'use_depthwise':
            self._use_depthwise,
        }

        with tf.variable_scope('MobilenetV1',
                               reuse=self._reuse_weights) as scope:
            with slim.arg_scope(
                    mobilenet_v1.mobilenet_v1_arg_scope(is_training=None)):
                with (slim.arg_scope(self._conv_hyperparams_fn())
                      if self._override_base_feature_extractor_hyperparams else
                      context_manager.IdentityContextManager()):
                    _, image_features = mobilenet_v1.mobilenet_v1_base(
                        ops.pad_to_multiple(preprocessed_inputs,
                                            self._pad_to_multiple),
                        final_endpoint='Conv2d_13_pointwise',
                        min_depth=self._min_depth,
                        depth_multiplier=self._depth_multiplier,
                        use_explicit_padding=self._use_explicit_padding,
                        scope=scope)
            with slim.arg_scope(self._conv_hyperparams_fn()):
                feature_maps = feature_map_generators.multi_resolution_feature_maps(
                    feature_map_layout=feature_map_layout,
                    depth_multiplier=self._depth_multiplier,
                    min_depth=self._min_depth,
                    insert_1x1_conv=True,
                    image_features=image_features)

        return feature_maps.values()
Ejemplo n.º 12
0
    def VQVAE_layer(self, inputs):
        # Assert last dimension is same as self._embedding_dim
        print("inputs:", inputs)

        input_shape = tf.shape(inputs)
        with tf.control_dependencies([
            tf.Assert(tf.equal(input_shape[-1], self._embedding_dim),
                      [input_shape])]):
            flat_inputs = tf.reshape(inputs, [-1, input_shape[1] * input_shape[2], self._embedding_dim])
            print("flat_inputs:", flat_inputs)

        self.variable_def()  # set all variable
        self.embedding_total_count += 1

        # the _w is already qunatized: for each row, each idx(latent variable digit) have its own value to pass, value pf _w is quantized embd ouput

        def dist_fn(tensor_apart):
            a2 = tf.reduce_sum(tensor_apart ** 2, 1, keepdims=True)
            b2 = tf.reduce_sum(self._w ** 2, 0, keepdims=True)
            ab = tf.matmul(tensor_apart, self._w)
            # print("tensor_apart:",tensor_apart)
            # print("self._w:",self._w)
            # print("ab:", ab)
            # print("a2:", a2)
            # print("b2:", b2)
            return a2 - 2 * ab + b2

            # dist = (tf.reduce_sum(tensor_apart ** 2, 1, keepdims=True)
            # - 2 * tf.matmul(tensor_apart, self._w)
            # + tf.reduce_sum(self._w ** 2, 0, keepdims=True))  # different shape: tf.add broadcast
            # return dist

        distances = tf.map_fn(dist_fn, flat_inputs)
        print("distances:", distances)


        #####
        ##### Gradient Based update
        #####
        # # distance.shape = [b,H*W,num_embeddings]
        # encoding_indices = tf.argmin(distances,
        #                              2)  # [b,H*W]
        # encodings = tf.one_hot(encoding_indices, self._num_embeddings)
        # quantized_embd_out = self.quantize(
        #     encoding_indices)  # Actually, this quantized method find the value from corespond econding_idx from w
        # print("quantized_embd_out:", quantized_embd_out)
        # print("inputs:", inputs)
        # print("encoding_indices:", encoding_indices)
        #
        #
        # encoding_indices = tf.expand_dims(encoding_indices, axis=-1)
        #
        #
        # quantized_embd_out = self.quantize(
        #     encoding_indices)  # Actually, this quantized method find the value from corespond econding_idx from w
        # print("quantized_embd_out:", quantized_embd_out)
        # quantized_embd_out = tf.reshape(quantized_embd_out, [tf.shape(inputs)[0],
        #                                                      tf.shape(inputs)[1],
        #                                                      tf.shape(inputs)[2],
        #                                                      quantized_embd_out.get_shape().as_list()[
        #                                                          2]])
        #
        # e_latent_loss = tf.reduce_mean((tf.stop_gradient(quantized_embd_out) - inputs) ** 2)  # embedding loss
        # q_latent_loss = tf.reduce_mean((tf.stop_gradient(inputs) - quantized_embd_out) ** 2)
        # VQ_loss = e_latent_loss + self.commit_loss_coef * q_latent_loss
        #
        # quantized_embd_out = inputs + tf.stop_gradient(
        #     quantized_embd_out - inputs)  # in order to pass value to decoder???
        # assign_moving_avg_op = self.loop_assign_moving_avg(encodings, flat_inputs)
        # temp_decay_op = self.temperature_decay()
        #
        # return {
        #     'quantized_embd_out': quantized_embd_out,
        #     # "quantized_embd_out": non_max_quantized_embd_out,
        #     'VQ_loss': VQ_loss,
        #     # 'encodings': multi_hot_encodings,
        #     'encodings': encodings,
        #     'encoding_indices': encoding_indices,
        #     'assign_moving_avg_op': assign_moving_avg_op,
        #     'temp_decay_op': temp_decay_op}


        # #####
        # ##### EMA Moving average(argmin)
        # #####
        #
        # # distance.shape = [b,H*W,num_embeddings]
        # encoding_indices = tf.argmin(distances,
        #                              2)  # [b,H*W]
        # encodings = tf.one_hot(encoding_indices, self._num_embeddings)
        # quantized_embd_out = self.quantize(
        #     encoding_indices)  # Actually, this quantized method find the value from corespond econding_idx from w
        # print("quantized_embd_out:", quantized_embd_out)
        # print("inputs:", inputs)
        # print("encoding_indices:", encoding_indices)
        #
        #
        # encoding_indices = tf.expand_dims(encoding_indices, axis=-1)
        #
        #
        # quantized_embd_out = self.quantize(
        #     encoding_indices)  # Actually, this quantized method find the value from corespond econding_idx from w
        # print("quantized_embd_out:", quantized_embd_out)
        # quantized_embd_out = tf.reshape(quantized_embd_out, [tf.shape(inputs)[0],
        #                                                      tf.shape(inputs)[1],
        #                                                      tf.shape(inputs)[2],
        #                                                      quantized_embd_out.get_shape().as_list()[
        #                                                          2]])
        #
        # e_latent_loss = tf.reduce_mean((tf.stop_gradient(quantized_embd_out) - inputs) ** 2)  # embedding loss
        # # q_latent_loss = tf.reduce_mean((tf.stop_gradient(inputs) - quantized_embd_out) ** 2)
        # VQ_loss = e_latent_loss
        #
        # quantized_embd_out = inputs + tf.stop_gradient(
        #     quantized_embd_out - inputs)  # in order to pass value to decoder???
        # assign_moving_avg_op = self.loop_assign_moving_avg(encodings, flat_inputs)
        # temp_decay_op = self.temperature_decay()
        #
        # return {
        #     'quantized_embd_out': quantized_embd_out,
        #     # "quantized_embd_out": non_max_quantized_embd_out,
        #     'VQ_loss': VQ_loss,
        #     # 'encodings': multi_hot_encodings,
        #     'encodings': encodings,
        #     'encoding_indices': encoding_indices,
        #     'assign_moving_avg_op': assign_moving_avg_op,
        #     'temp_decay_op': temp_decay_op}




        ####
        #### EMA Moving average(non max)
        ####

        non_max_encoding_indices = self.temperature_sampler(distances, self.sampling_temperature)  # [b,H*W,top_k]
        print("non_max_encoding_indices",non_max_encoding_indices)


        # non_max_encoding_indices = tf.cast(tf.expand_dims(tf.argmin(distances, 2), -1),tf.int32)  # [b,H*W]
        # print("non_max_encoding_indices:",non_max_encoding_indices)



        encoding_indices = tf.expand_dims(tf.argmin(distances,2),-1)  # [b,H*W]
        print("non_max_encoding_indices(argmax)", encoding_indices)
        same_idx =tf.reduce_sum(tf.cast(tf.equal(non_max_encoding_indices,tf.cast(encoding_indices,tf.int32)),tf.float32))



        multi_hot_encodings = tf.map_fn(lambda x: tf.reduce_sum(tf.one_hot(x, self._num_embeddings), axis=-2),
                                        tf.transpose(non_max_encoding_indices, perm=[1, 0, 2]), dtype=tf.float32)
        multi_hot_encodings = tf.transpose(multi_hot_encodings, perm=[1, 0, 2])
        print("multi_hot_encodings:", multi_hot_encodings)

        non_max_quantized_embd_out = self.quantize(non_max_encoding_indices)

        # print("non_max_quantized_embd_out:",    non_max_quantized_embd_out)
        non_max_quantized_embd_out = tf.reshape(non_max_quantized_embd_out, [tf.shape(inputs)[0],
                                                                             tf.shape(inputs)[1],
                                                                             tf.shape(inputs)[2],
                                                                             non_max_quantized_embd_out.get_shape().as_list()[2]])
        # print("non_max_quantized_embd_out:", non_max_quantized_embd_out)


        e_latent_loss = tf.reduce_mean((tf.stop_gradient(non_max_quantized_embd_out) - inputs) ** 2)  # embedding loss
        # q_latent_loss = tf.reduce_mean((tf.stop_gradient(inputs) - non_max_quantized_embd_out) ** 2)
        VQ_loss = e_latent_loss
        non_max_quantized_embd_out = inputs + tf.stop_gradient(
            non_max_quantized_embd_out - inputs)  # in order to pass value to decoder???
        assign_moving_avg_op = self.loop_assign_moving_avg(multi_hot_encodings, flat_inputs)


        temp_decay_op = self.temperature_decay()

        return {
            # 'quantized_embd_out': quantized_embd_out,
            "quantized_embd_out": non_max_quantized_embd_out,
            'VQ_loss': VQ_loss,
            'encodings': multi_hot_encodings,
            # 'encodings': encodings,
            # 'encoding_indices': encoding_indices,
            'encoding_indices': multi_hot_encodings,
            'assign_moving_avg_op': assign_moving_avg_op,
            'temp_decay_op': temp_decay_op,
            # "top_k_idx":self.top_k_idx.shape
            'top_k_idx':same_idx
        }
Ejemplo n.º 13
0
def pad_to_bounding_box(image, offset_height, offset_width, target_height,
                        target_width, pad_value):
    """Pads the given image with the given pad_value.

  Works like tf.image.pad_to_bounding_box, except it can pad the image
  with any given arbitrary pad value and also handle images whose sizes are not
  known during graph construction.

  Args:
    image: 3-D tensor with shape [height, width, channels]
    offset_height: Number of rows of zeros to add on top.
    offset_width: Number of columns of zeros to add on the left.
    target_height: Height of output image.
    target_width: Width of output image.
    pad_value: Value to pad the image tensor with.

  Returns:
    3-D tensor of shape [target_height, target_width, channels].

  Raises:
    ValueError: If the shape of image is incompatible with the offset_* or
    target_* arguments.
  """
    with tf.name_scope(None, 'pad_to_bounding_box', [image]):
        image = tf.convert_to_tensor(image, name='image')
        original_dtype = image.dtype
        if original_dtype != tf.float32 and original_dtype != tf.float64:
            # If image dtype is not float, we convert it to int32 to avoid overflow.
            image = tf.cast(image, tf.int32)
        image_rank_assert = tf.Assert(
            tf.logical_or(tf.equal(tf.rank(image), 3),
                          tf.equal(tf.rank(image), 4)),
            ['Wrong image tensor rank.'])
        with tf.control_dependencies([image_rank_assert]):
            image -= pad_value
        image_shape = image.get_shape()
        is_batch = True
        if image_shape.ndims == 3:
            is_batch = False
            image = tf.expand_dims(image, 0)
        elif image_shape.ndims is None:
            is_batch = False
            image = tf.expand_dims(image, 0)
            image.set_shape([None] * 4)
        elif image.get_shape().ndims != 4:
            raise ValueError('Input image must have either 3 or 4 dimensions.')
        _, height, width, _ = _image_dimensions(image, rank=4)
        target_width_assert = tf.Assert(tf.greater_equal(target_width, width),
                                        ['target_width must be >= width'])
        target_height_assert = tf.Assert(
            tf.greater_equal(target_height, height),
            ['target_height must be >= height'])
        with tf.control_dependencies([target_width_assert]):
            after_padding_width = target_width - offset_width - width
        with tf.control_dependencies([target_height_assert]):
            after_padding_height = target_height - offset_height - height
        offset_assert = tf.Assert(
            tf.logical_and(tf.greater_equal(after_padding_width, 0),
                           tf.greater_equal(after_padding_height, 0)),
            ['target size not possible with the given target offsets'])
        batch_params = tf.stack([0, 0])
        height_params = tf.stack([offset_height, after_padding_height])
        width_params = tf.stack([offset_width, after_padding_width])
        channel_params = tf.stack([0, 0])
        with tf.control_dependencies([offset_assert]):
            paddings = tf.stack(
                [batch_params, height_params, width_params, channel_params])
        padded = tf.pad(image, paddings)
        if not is_batch:
            padded = tf.squeeze(padded, axis=[0])
        outputs = padded + pad_value
        if outputs.dtype != original_dtype:
            outputs = tf.cast(outputs, original_dtype)
        return outputs
Ejemplo n.º 14
0
def nearest_patch_swapping(content_features, style_features, patch_size=3):
    # channels for both the content and style, must be the same
    c_shape = tf.shape(content_features)
    s_shape = tf.shape(style_features)
    channel_assertion = tf.Assert(tf.equal(c_shape[3], s_shape[3]),
                                  ['number of channels  must be the same'])

    with tf.control_dependencies([channel_assertion]):
        # spatial shapes for style and content features
        c_height, c_width, c_channel = c_shape[1], c_shape[2], c_shape[3]

        # convert the style features into convolutional kernels
        style_kernels = tf.extract_image_patches(
            style_features,
            ksizes=[1, patch_size, patch_size, 1],
            strides=[1, 1, 1, 1],
            rates=[1, 1, 1, 1],
            padding='SAME')
        style_kernels = tf.squeeze(style_kernels, axis=0)
        style_kernels = tf.transpose(style_kernels, perm=[2, 0, 1])

        # gather the conv and deconv kernels
        v_height, v_width = style_kernels.get_shape().as_list()[1:3]
        deconv_kernels = tf.reshape(style_kernels,
                                    shape=(patch_size, patch_size, c_channel,
                                           v_height * v_width))

        kernels_norm = tf.norm(style_kernels, axis=0, keep_dims=True)
        kernels_norm = tf.reshape(kernels_norm,
                                  shape=(1, 1, 1, v_height * v_width))

        # calculate the normalization factor
        mask = tf.ones((c_height, c_width), tf.float32)
        fullmask = tf.zeros(
            (c_height + patch_size - 1, c_width + patch_size - 1), tf.float32)
        for x in range(patch_size):
            for y in range(patch_size):
                paddings = [[x, patch_size - x - 1], [y, patch_size - y - 1]]
                padded_mask = tf.pad(mask, paddings=paddings, mode="CONSTANT")
                fullmask += padded_mask
        pad_width = int((patch_size - 1) / 2)
        deconv_norm = tf.slice(fullmask, [pad_width, pad_width],
                               [c_height, c_width])
        deconv_norm = tf.reshape(deconv_norm, shape=(1, c_height, c_width, 1))

        ########################
        # starting convolution #
        ########################
        # padding operation
        pad_total = patch_size - 1
        pad_beg = pad_total // 2
        pad_end = pad_total - pad_beg
        paddings = [[0, 0], [pad_beg, pad_end], [pad_beg, pad_end], [0, 0]]

        # convolutional operations
        net = tf.pad(content_features, paddings=paddings, mode="REFLECT")
        net = tf.nn.conv2d(net,
                           tf.div(deconv_kernels, kernels_norm + 1e-7),
                           strides=[1, 1, 1, 1],
                           padding='VALID')
        # find the maximum locations
        best_match_ids = tf.argmax(net, axis=3)
        best_match_ids = tf.cast(tf.one_hot(best_match_ids,
                                            depth=v_height * v_width),
                                 dtype=tf.float32)

        # find the patches and warping the output
        unnormalized_output = tf.nn.conv2d_transpose(
            value=best_match_ids,
            filter=deconv_kernels,
            output_shape=(c_shape[0], c_height + pad_total,
                          c_width + pad_total, c_channel),
            strides=[1, 1, 1, 1],
            padding='VALID')
        unnormalized_output = tf.slice(unnormalized_output,
                                       [0, pad_beg, pad_beg, 0], c_shape)
        output = tf.div(unnormalized_output, deconv_norm)
        output = tf.reshape(output, shape=c_shape)

        # output the swapped feature maps
        return output
Ejemplo n.º 15
0
def detection_targets_graph2(proposals, gt_class_ids, gt_boxes, config):
    '''
    Generates detection targets for one image. Subsamples proposals and
    generates target class IDs, bounding box deltas, and masks for each.

    Inputs:
    -------
    proposals:          [N, 2000, (y1, x1, y2, x2)] in normalized coordinates. 
                             Might be zero padded if there are not enough proposals.
    gt_class_ids:       [MAX_GT_INSTANCES] int class IDs
    gt_boxes:           [MAX_GT_INSTANCES, (y1, x1, y2, x2)] in normalized coordinates.
    gt_masks:           [height, width, MAX_GT_INSTANCES] of boolean type.

    Returns:            Target ROIs and corresponding class IDs, bounding box shifts, and masks.
    --------
    rois:               [TRAIN_ROIS_PER_IMAGE, (y1, x1, y2, x2)] in normalized coordinates
    class_ids:          [TRAIN_ROIS_PER_IMAGE]. Integer class IDs. Zero padded.
    deltas:             [TRAIN_ROIS_PER_IMAGE, NUM_CLASSES, (dy, dx, log(dh), log(dw))]
                        Class-specific bbox refinments.
    masks:              [TRAIN_ROIS_PER_IMAGE, height, width). Masks cropped to bbox
                        boundaries and resized to neural network output size.
   
    Note: Returned arrays might be zero padded if not enough target ROIs.

    '''
    # Assertions
    asserts = [
        tf.Assert(tf.greater(tf.shape(proposals)[0], 0), [proposals],
                  name="roi_assertion"),
    ]

    with tf.control_dependencies(asserts):
        proposals = tf.identity(proposals)
    # print('>>> detection_targets_graph ')
    # print('     propsals.shape        :',  proposals.shape, proposals.get_shape(), KB.int_shape(proposals) )
    # print('     gt_boxes.shape        :',  gt_boxes.shape ,    KB.int_shape(gt_boxes)   )
    # print('     gt_class_ids.shape    :',  gt_class_ids.shape, KB.int_shape(gt_class_ids))
    # print('     gt_masks.shape        :',  gt_masks.shape ,    KB.int_shape(gt_masks)   )

    # Remove zero padding
    # non_zeros returns indicies to valid bboxes, which we use to index gt_class_ids, and gt_masks
    proposals, _ = utils.trim_zeros_graph(proposals, name="trim_proposals")
    gt_boxes, non_zeros = utils.trim_zeros_graph(gt_boxes,
                                                 name="trim_gt_boxes")
    gt_class_ids = tf.boolean_mask(gt_class_ids,
                                   non_zeros,
                                   name="trim_gt_class_ids")
    # gt_masks            = tf.gather(gt_masks, tf.where(non_zeros)[:, 0], axis=2,name="trim_gt_masks")

    #------------------------------------------------------------------------------------------
    # Handle COCO crowds
    # A crowd box in COCO is a bounding box around several instances. Exclude
    # them from training. A crowd box is given a negative class ID.
    #------------------------------------------------------------------------------------------
    # tf.where : returns the coordinates of true elements of  the specified conditon.
    #            The coordinates are returned in a 2-D tensor where the first dimension (rows)
    #            represents the number of true elements, and the second dimension (columns)
    #            represents the coordinates of the true elements.
    #            Keep in mind, the shape of the output tensor can vary depending on how many
    #            true values there are in input. Indices are output in row-major order.
    #
    # tf.gather: Gather slices from params axis (default = 0) according to indices.
    #            indices must be an integer tensor of any dimension (usually 0-D or 1-D).
    #            Produces an output tensor with shape:
    #                   params.shape[:axis] + indices.shape + params.shape[axis + 1:]
    #
    # tf.squeeze: Removes dimensions of size 1 from the shape of a tensor.
    #            Given a tensor input, this operation returns a tensor of the same type with
    #            all dimensions of size 1 removed. If you don't want to remove all size 1
    #            dimensions, you can remove specific size 1 dimensions by specifying axis.
    #------------------------------------------------------------------------------------------

    crowd_ix = tf.where(gt_class_ids < 0)[:, 0]
    non_crowd_ix = tf.where(gt_class_ids > 0)[:, 0]
    crowd_boxes = tf.gather(gt_boxes, crowd_ix)
    # crowd_masks     = tf.gather(gt_masks, crowd_ix, axis=2)
    gt_class_ids = tf.gather(gt_class_ids, non_crowd_ix)
    gt_boxes = tf.gather(gt_boxes, non_crowd_ix)
    # gt_masks        = tf.gather(gt_masks, non_crowd_ix, axis=2)

    # Compute overlaps with crowd boxes [anchors, crowds]
    crowd_overlaps = overlaps_graph2(proposals, crowd_boxes)
    crowd_iou_max = tf.reduce_max(crowd_overlaps, axis=1)
    no_crowd_bool = (crowd_iou_max < 0.001)

    # Compute overlaps matrix [proposals, gt_boxes] - The IoU between
    # proposals and gt_boxes (non-crowd gt boxes, designated by classId < 0 in Coco)
    # overlaps is
    # compute max of elements across axis 1 of overlaps tensor.
    overlaps = overlaps_graph2(proposals, gt_boxes)
    roi_iou_max = tf.reduce_max(overlaps, axis=1)
    # print('     overlaps.shape        :',  overlaps.shape, KB.int_shape(overlaps)   )

    ## 1. Determine indices of postive ROI propsal boxes
    #    Identify ROI proposal boxes that have an IoU >= 05 overlap with some gt_box, and store
    #    indices into positive_indices
    positive_roi_bool = (roi_iou_max >= 0.5)
    positive_indices = tf.where(positive_roi_bool)[:, 0]

    ## 2. Determine indices of negative ROI proposal boxes
    #    those with < 0.5 with every GT box and are not crowds bboxes
    # the where creates a array with shape [# of answers, 1] so we use [:, 0] after
    ## current method
    negative_indices = tf.where(
        tf.logical_and(roi_iou_max < 0.5, no_crowd_bool))[:, 0]

    ## new method
    # this modification will determine negative ROI proposal boxes but in addition,
    # will suppress the zero RoIs from the indicies
    # note that   ( negative_bool         = ~positive_roi_bool)
    # negative_nonzero_bool = tf.logical_and(~positive_roi_bool, (roi_iou_max > 0))
    # negative_nonzero_bool = tf.logical_and(negative_nonzero_bool, no_crowd_bool)
    # negative_indices2     = tf.where(negative_nonzero_bool) [:, 0]

    ## 3. Subsample positive ROIs based on ROI_POSITIVE_RATIO
    #    Aim for 33% positive (config.ROI_POSITIVE_RATIO = 0.33)
    #    Positive ROIs   33% of config.TRAIN_ROIS_PER_IMAGE ~  11
    positive_count = int(config.TRAIN_ROIS_PER_IMAGE *
                         config.ROI_POSITIVE_RATIO)
    positive_indices = tf.random_shuffle(positive_indices)[:positive_count]
    positive_count = tf.shape(positive_indices)[0]

    ## 4. Add Negative ROIs. Add enough to maintain positive:negative ratio
    #     negative_count = int((positive_count / config.ROI_POSITIVE_RATIO) - positive_count)
    r = 1.0 / config.ROI_POSITIVE_RATIO
    negative_count = tf.cast(r * tf.cast(positive_count, tf.float32),
                             tf.int32) - positive_count
    negative_indices = tf.random_shuffle(negative_indices)[:negative_count]

    ## 5.   Gather selected positive and negative ROIs
    positive_rois = tf.gather(proposals, positive_indices)
    negative_rois = tf.gather(proposals, negative_indices)

    ## 6.   Assign positive ROIs to GT boxes.
    #      roi_gt_box_assignment shows for each positive overlap, which class has the maximum overlap
    positive_overlaps = tf.gather(overlaps, positive_indices)
    roi_gt_box_assignment = tf.argmax(positive_overlaps, axis=1)
    roi_gt_boxes = tf.gather(gt_boxes, roi_gt_box_assignment)
    roi_gt_class_ids = tf.gather(gt_class_ids, roi_gt_box_assignment)
    # print('     shape of positive overlaps is :', positive_overlaps.get_shape())

    ## 7.   Compute bbox delta
    # calculate refinement (difference b/w positive rois and gt_boxes) for positive ROIs
    roi_gt_deltas = utils.box_refinement_graph(positive_rois, roi_gt_boxes)
    roi_gt_deltas /= config.BBOX_STD_DEV

    ## 8.  prepare gt_masks
    #      transpose gt_masks from [h, w, N] to [N, height, width] and add 4th dim at end [N, height, width, 1]
    #      Pick the right mask for each ROI
    # transposed_masks = tf.expand_dims(tf.transpose(gt_masks, [2, 0, 1]), -1)
    # roi_masks = tf.gather(transposed_masks, roi_gt_box_assignment)

    # Compute mask targets
    # boxes = positive_rois

    # if config.USE_MINI_MASK:
    # Transform ROI corrdinates from normalized image space
    # to normalized mini-mask space.
    # y1, x1, y2, x2 = tf.split(positive_rois, 4, axis=1)
    # gt_y1, gt_x1, gt_y2, gt_x2 = tf.split(roi_gt_boxes, 4, axis=1)
    # gt_h = gt_y2 - gt_y1
    # gt_w = gt_x2 - gt_x1
    # y1 = (y1 - gt_y1) / gt_h
    # x1 = (x1 - gt_x1) / gt_w
    # y2 = (y2 - gt_y1) / gt_h
    # x2 = (x2 - gt_x1) / gt_w
    # boxes = tf.concat([y1, x1, y2, x2], 1)

    # box_ids = tf.range(0, tf.shape(roi_masks)[0])
    # masks   = tf.image.crop_and_resize(tf.cast(roi_masks, tf.float32),
    # boxes,
    # box_ids,
    # config.MASK_SHAPE)

    # Remove the extra dimension from masks.
    # masks = tf.squeeze(masks, axis=3)

    # Threshold mask pixels at 0.5 to have GT masks be 0 or 1 to use with
    # binary cross entropy loss.
    # masks = tf.round(masks)

    # Append negative ROIs and pad bbox roi_gt_deltas and masks that
    # are not used for negative ROIs with zeros.
    rois = tf.concat([positive_rois, negative_rois], axis=0)
    N = tf.shape(negative_rois)[0]
    P = tf.maximum(config.TRAIN_ROIS_PER_IMAGE - tf.shape(rois)[0], 0)
    rois = tf.pad(rois, [(0, P), (0, 0)])

    roi_gt_boxes = tf.pad(roi_gt_boxes, [(0, N + P), (0, 0)])
    roi_gt_class_ids = tf.pad(roi_gt_class_ids, [(0, N + P)])
    roi_gt_deltas = tf.pad(roi_gt_deltas, [(0, N + P), (0, 0)])
    # masks            = tf.pad(masks           , [[0, N + P], (0, 0), (0, 0)])

    # print(' roi_gt_boxes :  ' , tf.shape(roi_gt_boxes) )
    # print(' P:  ' , P,  ' N :    ', N)
    # print('     roi.shape             :',  rois.shape            , tf.shape(rois))
    # print('     roi_gt_class_ids.shape:',  roi_gt_class_ids.shape, tf.shape(roi_gt_class_ids))
    # print('     roi_gt_deltas.shape   :',  roi_gt_deltas.shape   , tf.shape(roi_gt_deltas))
    # print('     masks.shape           :',  masks.shape           , tf.shape(masks))
    # print('     roi_gt_boxes.shape    :',  roi_gt_boxes.shape    , tf.shape(roi_gt_boxes))

    return rois, roi_gt_class_ids, roi_gt_deltas, roi_gt_boxes
Ejemplo n.º 16
0
    def __init__(self, pdfs: List[ZfitPDF], fracs: Optional[ztyping.ParamTypeInput] = None,
                 obs: ztyping.ObsTypeInput = None,
                 name: str = "SumPDF"):
        """Create the sum of the `pdfs` with `fracs` as coefficients.

        Args:
            pdfs (pdf): The pdfs to add.
            fracs (iterable): coefficients for the linear combination of the pdfs. If pdfs are
                extended, this throws an error.

                  - len(frac) == len(basic) - 1 results in the interpretation of a non-extended pdf.
                    The last coefficient will equal to 1 - sum(frac)
                  - len(frac) == len(pdf) each pdf in `pdfs` will become an extended pdf with the
                    given yield.
            name (str):
        """
        # Check user input, improve TODO
        self._fracs = None

        set_yield_at_end = False
        pdfs = convert_to_container(pdfs)
        self.pdfs = pdfs
        if len(pdfs) < 2:
            raise ValueError("Cannot build a sum of a single pdf")
        if fracs is not None:
            fracs = convert_to_container(fracs)
            fracs = [convert_to_parameter(frac) for frac in fracs]

        # check if all extended
        extended_pdfs = self.pdfs_extended
        implicit = None
        extended = None
        if all(extended_pdfs):
            implicit = True
            extended = True

        # all extended except one -> fraction
        elif sum(extended_pdfs) == len(extended_pdfs) - 1:
            implicit = True
            extended = False

        # no pdf is extended -> using `fracs`
        elif not any(extended_pdfs) and fracs is not None:
            # make extended
            if len(fracs) == len(pdfs):
                implicit = False
                extended = True
            elif len(fracs) == len(pdfs) - 1:
                implicit = False
                extended = False

        # catch if args don't fit known case
        value_error = implicit is None or extended is None
        if (implicit and fracs is not None) or value_error:
            raise ModelIncompatibleError("Wrong arguments. Either"
                                         "\n a) `pdfs` are not extended and `fracs` is given with length pdfs "
                                         "(-> pdfs get extended) or pdfs - 1 (fractions)"
                                         "\n b) all or all except 1 `pdfs` are extended and fracs is None.")

        # create fracs if one is not extended
        if not extended and implicit:
            fracs = []
            not_extended_position = None
            new_pdfs = []
            for i, pdf in enumerate(pdfs):
                if pdf.is_extended:
                    fracs.append(pdf.get_yield())
                    pdf = pdf.copy()
                    pdf._set_yield_inplace(None)  # make non-extended

                else:
                    fracs.append(tf.constant(0., dtype=ztypes.float))
                    not_extended_position = i
                new_pdfs.append(pdf)
            pdfs = new_pdfs
            copied_fracs = fracs.copy()
            remaining_frac_func = lambda: tf.constant(1., dtype=ztypes.float) - tf.add_n(copied_fracs)
            remaining_frac = convert_to_parameter(remaining_frac_func,
                                                  dependents=[convert_to_parameter(f) for f in copied_fracs])
            if run.numeric_checks:
                assert_op = tf.Assert(tf.greater_equal(remaining_frac, tf.constant(0., dtype=ztypes.float)),
                                      data=[remaining_frac])  # check fractions
                deps = [assert_op]
            else:
                deps = []
            fracs[not_extended_position] = remaining_frac
            implicit = False  # now it's explicit

        elif not extended and not implicit:
            # remaining_frac_func = lambda: tf.constant(1., dtype=ztypes.float) - tf.add_n(fracs)
            copied_fracs = fracs.copy()

            def remaining_frac_func():
                return tf.constant(1., dtype=ztypes.float) - tf.add_n(copied_fracs)

            remaining_frac = convert_to_parameter(remaining_frac_func,
                                                  dependents=[convert_to_parameter(f) for f in copied_fracs])
            if run.numeric_checks:
                assert_op = tf.Assert(tf.greater_equal(remaining_frac, tf.constant(0., dtype=ztypes.float)),
                                      data=[remaining_frac])  # check fractions
                deps = [assert_op]
            else:
                deps = []
            fracs.append(remaining_frac)

        # make extended
        elif extended and not implicit:
            yields = fracs
            pdfs = [pdf.create_extended(yield_) for pdf, yield_ in zip(pdfs, yields)]

            implicit = True

        elif extended and implicit:
            yields = [pdf.get_yield() for pdf in pdfs]

        if extended:
            # TODO(Mayou36): convert to correct dtype
            def sum_yields_func():
                return tf.reduce_sum(
                    input_tensor=[tf.convert_to_tensor(value=y, dtype_hint=ztypes.float) for y in yields.copy()])

            sum_yields = convert_to_parameter(sum_yields_func, dependents=yields)
            yield_fracs = [convert_to_parameter(lambda yield_=yield_: yield_ / sum_yields, dependents=yield_)
                           for yield_ in yields]

            self.fracs = yield_fracs
            set_yield_at_end = True
            self._maybe_extended_fracs = [tf.constant(1, dtype=ztypes.float)] * len(self.pdfs)
        else:
            self._maybe_extended_fracs = fracs

        self.pdfs = pdfs

        params = OrderedDict()
        # TODO(Mayou36): this is not right. Where to create the params if extended? The correct fracs?
        for i, frac in enumerate(self._maybe_extended_fracs):
            params['frac_{}'.format(i)] = frac

        super().__init__(pdfs=pdfs, obs=obs, params=params, name=name)
        if set_yield_at_end:
            self._set_yield_inplace(sum_yields)
Ejemplo n.º 17
0
def detection_targets_graph(proposals, gt_class_ids, gt_boxes, gt_masks,
                            config):
    """Generates detection targets for one image. Subsamples proposals and
    generates target class IDs, bounding box deltas, and masks for each.
    Inputs:
    proposals: [POST_NMS_ROIS_TRAINING, (y1, x1, y2, x2)] in normalized coordinates. Might
               be zero padded if there are not enough proposals.
    gt_class_ids: [MAX_GT_INSTANCES] int class IDs
    gt_boxes: [MAX_GT_INSTANCES, (y1, x1, y2, x2)] in normalized coordinates.
    gt_masks: [height, width, MAX_GT_INSTANCES] of boolean type.
    Returns: Target ROIs and corresponding class IDs, bounding box shifts,
    and masks.
    rois: [TRAIN_ROIS_PER_IMAGE, (y1, x1, y2, x2)] in normalized coordinates
    class_ids: [TRAIN_ROIS_PER_IMAGE]. Integer class IDs. Zero padded.
    deltas: [TRAIN_ROIS_PER_IMAGE, (dy, dx, log(dh), log(dw))]
    masks: [TRAIN_ROIS_PER_IMAGE, height, width]. Masks cropped to bbox
           boundaries and resized to neural network output size.
    Note: Returned arrays might be zero padded if not enough target ROIs.
    """
    # Assertions
    asserts = [
        tf.Assert(tf.greater(tf.shape(proposals)[0], 0), [proposals],
                  name="roi_assertion"),
    ]
    with tf.control_dependencies(asserts):
        proposals = tf.identity(proposals)

    # Remove zero padding
    proposals, _ = trim_zeros_graph(proposals, name="trim_proposals")
    gt_boxes, non_zeros = trim_zeros_graph(gt_boxes, name="trim_gt_boxes")
    gt_class_ids = tf.boolean_mask(gt_class_ids,
                                   non_zeros,
                                   name="trim_gt_class_ids")
    gt_masks = tf.gather(gt_masks,
                         tf.where(non_zeros)[:, 0],
                         axis=2,
                         name="trim_gt_masks")

    # Handle COCO crowds
    # A crowd box in COCO is a bounding box around several instances. Exclude
    # them from training. A crowd box is given a negative class ID.
    crowd_ix = tf.where(gt_class_ids < 0)[:, 0]
    non_crowd_ix = tf.where(gt_class_ids > 0)[:, 0]
    crowd_boxes = tf.gather(gt_boxes, crowd_ix)
    gt_class_ids = tf.gather(gt_class_ids, non_crowd_ix)
    gt_boxes = tf.gather(gt_boxes, non_crowd_ix)
    gt_masks = tf.gather(gt_masks, non_crowd_ix, axis=2)

    # Compute overlaps matrix [proposals, gt_boxes]
    overlaps = overlaps_graph(proposals, gt_boxes)

    # Compute overlaps with crowd boxes [proposals, crowd_boxes]
    crowd_overlaps = overlaps_graph(proposals, crowd_boxes)
    crowd_iou_max = tf.reduce_max(crowd_overlaps, axis=1)
    no_crowd_bool = (crowd_iou_max < 0.001)

    # Determine positive and negative ROIs
    roi_iou_max = tf.reduce_max(overlaps, axis=1)
    # 1. Positive ROIs are those with >= 0.5 IoU with a GT box
    positive_roi_bool = (roi_iou_max >= 0.5)
    positive_indices = tf.where(positive_roi_bool)[:, 0]
    # 2. Negative ROIs are those with < 0.5 with every GT box. Skip crowds.
    negative_indices = tf.where(
        tf.logical_and(roi_iou_max < 0.5, no_crowd_bool))[:, 0]

    # Subsample ROIs. Aim for 33% positive
    # Positive ROIs
    positive_count = int(config.TRAIN_ROIS_PER_IMAGE *
                         config.ROI_POSITIVE_RATIO)
    positive_indices = tf.random.shuffle(positive_indices)[:positive_count]
    positive_count = tf.shape(positive_indices)[0]
    # Negative ROIs. Add enough to maintain positive:negative ratio.
    r = 1.0 / config.ROI_POSITIVE_RATIO
    negative_count = tf.cast(r * tf.cast(positive_count, tf.float32),
                             tf.int32) - positive_count
    negative_indices = tf.random.shuffle(negative_indices)[:negative_count]
    # Gather selected ROIs
    positive_rois = tf.gather(proposals, positive_indices)
    negative_rois = tf.gather(proposals, negative_indices)

    # Assign positive ROIs to GT boxes.
    positive_overlaps = tf.gather(overlaps, positive_indices)
    roi_gt_box_assignment = tf.cond(
        tf.greater(tf.shape(positive_overlaps)[1], 0),
        true_fn=lambda: tf.argmax(positive_overlaps, axis=1),
        false_fn=lambda: tf.cast(tf.constant([]), tf.int64))
    roi_gt_boxes = tf.gather(gt_boxes, roi_gt_box_assignment)
    roi_gt_class_ids = tf.gather(gt_class_ids, roi_gt_box_assignment)

    # Compute bbox refinement for positive ROIs
    deltas = utils.box_refinement_graph(positive_rois, roi_gt_boxes)
    deltas /= config.BBOX_STD_DEV

    # Assign positive ROIs to GT masks
    # Permute masks to [N, height, width, 1]
    transposed_masks = tf.expand_dims(tf.transpose(gt_masks, [2, 0, 1]), -1)
    # Pick the right mask for each ROI
    roi_masks = tf.gather(transposed_masks, roi_gt_box_assignment)

    # Compute mask targets
    boxes = positive_rois
    if config.USE_MINI_MASK:
        # Transform ROI coordinates from normalized image space
        # to normalized mini-mask space.
        y1, x1, y2, x2 = tf.split(positive_rois, 4, axis=1)
        gt_y1, gt_x1, gt_y2, gt_x2 = tf.split(roi_gt_boxes, 4, axis=1)
        gt_h = gt_y2 - gt_y1
        gt_w = gt_x2 - gt_x1
        y1 = (y1 - gt_y1) / gt_h
        x1 = (x1 - gt_x1) / gt_w
        y2 = (y2 - gt_y1) / gt_h
        x2 = (x2 - gt_x1) / gt_w
        boxes = tf.concat([y1, x1, y2, x2], 1)
    box_ids = tf.range(0, tf.shape(roi_masks)[0])
    masks = tf.image.crop_and_resize(tf.cast(roi_masks, tf.float32), boxes,
                                     box_ids, config.MASK_SHAPE)
    # Remove the extra dimension from masks.
    masks = tf.squeeze(masks, axis=3)

    # Threshold mask pixels at 0.5 to have GT masks be 0 or 1 to use with
    # binary cross entropy loss.
    masks = tf.round(masks)

    # Append negative ROIs and pad bbox deltas and masks that
    # are not used for negative ROIs with zeros.
    rois = tf.concat([positive_rois, negative_rois], axis=0)
    N = tf.shape(negative_rois)[0]
    P = tf.maximum(config.TRAIN_ROIS_PER_IMAGE - tf.shape(rois)[0], 0)
    rois = tf.pad(rois, [(0, P), (0, 0)])
    roi_gt_boxes = tf.pad(roi_gt_boxes, [(0, N + P), (0, 0)])
    roi_gt_class_ids = tf.pad(roi_gt_class_ids, [(0, N + P)])
    deltas = tf.pad(deltas, [(0, N + P), (0, 0)])
    masks = tf.pad(masks, [[0, N + P], (0, 0), (0, 0)])

    return rois, roi_gt_class_ids, deltas, masks
Ejemplo n.º 18
0
def _random_crop(image_list, label_list, crop_height, crop_width):
    if not image_list:
        raise ValueError('Empty image_list.')

    # Compute the rank assertions.
    rank_assertions = []
    for i in range(len(image_list)):
        image_rank = tf.rank(image_list[i])
        rank_assert = tf.Assert(tf.equal(image_rank, 3), [
            'Wrong rank for tensor  %s [expected] [actual]',
            image_list[i].name, 3, image_rank
        ])
        rank_assertions.append(rank_assert)

    image_shape = control_flow_ops.with_dependencies([rank_assertions[0]],
                                                     tf.shape(image_list[0]))
    image_height = image_shape[0]
    image_width = image_shape[1]
    crop_size_assert = tf.Assert(
        tf.logical_and(tf.greater_equal(image_height, crop_height),
                       tf.greater_equal(image_width, crop_width)), [
                           'Crop size greater than the image size.',
                           image_height, image_width, crop_height, crop_width
                       ])

    asserts = [rank_assertions[0], crop_size_assert]

    for i in range(1, len(image_list)):
        image = image_list[i]
        asserts.append(rank_assertions[i])
        shape = control_flow_ops.with_dependencies([rank_assertions[i]],
                                                   tf.shape(image))
        height = shape[0]
        width = shape[1]

        height_assert = tf.Assert(tf.equal(height, image_height), [
            'Wrong height for tensor %s [expected][actual]', image.name,
            height, image_height
        ])
        width_assert = tf.Assert(tf.equal(width, image_width), [
            'Wrong width for tensor %s [expected][actual]', image.name, width,
            image_width
        ])
        asserts.extend([height_assert, width_assert])

    # Create a random bounding box.
    #
    # Use tf.random_uniform and not numpy.random.rand as doing the former would
    # generate random numbers at graph eval time, unlike the latter which
    # generates random numbers at graph definition time.
    max_offset_height = control_flow_ops.with_dependencies(
        asserts, tf.reshape(image_height - crop_height + 1, []))
    max_offset_width = control_flow_ops.with_dependencies(
        asserts, tf.reshape(image_width - crop_width + 1, []))
    offset_height = tf.random_uniform([],
                                      maxval=max_offset_height,
                                      dtype=tf.int32)
    offset_width = tf.random_uniform([],
                                     maxval=max_offset_width,
                                     dtype=tf.int32)

    cropped_images = [
        _crop(image, offset_height, offset_width, crop_height, crop_width)
        for image in image_list
    ]
    cropped_labels = [
        _crop(label, offset_height, offset_width, crop_height, crop_width)
        for label in label_list
    ]
    return cropped_images, cropped_labels
Ejemplo n.º 19
0
def minimize(value_and_gradients_function,
             initial_position,
             tolerance=1e-8,
             x_tolerance=0,
             f_relative_tolerance=0,
             initial_inverse_hessian_estimate=None,
             max_iterations=50,
             parallel_iterations=1,
             name=None):
  """Applies the BFGS algorithm to minimize a differentiable function.

  Performs unconstrained minimization of a differentiable function using the
  BFGS scheme. For details of the algorithm, see [Nocedal and Wright(2006)][1].

  ### Usage:

  The following example demonstrates the BFGS optimizer attempting to find the
  minimum for a simple two dimensional quadratic objective function.

  ```python
    minimum = np.array([1.0, 1.0])  # The center of the quadratic bowl.
    scales = np.array([2.0, 3.0])  # The scales along the two axes.

    # The objective function and the gradient.
    def quadratic(x):
      value = tf.reduce_sum(scales * (x - minimum) ** 2)
      return value, tf.gradients(value, x)[0]

    start = tf.constant([0.6, 0.8])  # Starting point for the search.
    optim_results = tfp.optimizer.bfgs_minimize(
        quadratic, initial_position=start, tolerance=1e-8)

    with tf.Session() as session:
      results = session.run(optim_results)
      # Check that the search converged
      assert(results.converged)
      # Check that the argmin is close to the actual value.
      np.testing.assert_allclose(results.position, minimum)
      # Print out the total number of function evaluations it took. Should be 6.
      print ("Function evaluations: %d" % results.num_objective_evaluations)
  ```

  ### References:
  [1]: Jorge Nocedal, Stephen Wright. Numerical Optimization. Springer Series in
    Operations Research. pp 136-140. 2006
    http://pages.mtu.edu/~struther/Courses/OLD/Sp2013/5630/Jorge_Nocedal_Numerical_optimization_267490.pdf

  Args:
    value_and_gradients_function:  A Python callable that accepts a point as a
      real `Tensor` and returns a tuple of `Tensor`s of real dtype containing
      the value of the function and its gradient at that point. The function
      to be minimized. The first component of the return value should be a
      real scalar `Tensor`. The second component (the gradient) should have the
      same shape as the input value to the function.
    initial_position: `Tensor` of real dtype. The starting point of the search
      procedure. Should be a point at which the function value and the gradient
      norm are finite.
    tolerance: Scalar `Tensor` of real dtype. Specifies the gradient tolerance
      for the procedure. If the supremum norm of the gradient vector is below
      this number, the algorithm is stopped.
    x_tolerance: Scalar `Tensor` of real dtype. If the absolute change in the
      position between one iteration and the next is smaller than this number,
      the algorithm is stopped.
    f_relative_tolerance: Scalar `Tensor` of real dtype. If the relative change
      in the objective value between one iteration and the next is smaller
      than this value, the algorithm is stopped.
    initial_inverse_hessian_estimate: Optional `Tensor` of the same dtype
      as the components of the output of the `value_and_gradients_function`.
      If specified, the shape should be `initial_position.shape` * 2.
      For example, if the shape of `initial_position` is `[n]`, then the
      acceptable shape of `initial_inverse_hessian_estimate` is as a square
      matrix of shape `[n, n]`.
      If the shape of `initial_position` is `[n, m]`, then the required shape
      is `[n, m, n, m]`.
      For the correctness of the algorithm, it is required that this parameter
      be symmetric and positive definite. Specifies the starting estimate for
      the inverse of the Hessian at the initial point. If not specified,
      the identity matrix is used as the starting estimate for the
      inverse Hessian.
    max_iterations: Scalar positive int32 `Tensor`. The maximum number of
      iterations for BFGS updates.
    parallel_iterations: Positive integer. The number of iterations allowed to
      run in parallel.
    name: (Optional) Python str. The name prefixed to the ops created by this
      function. If not supplied, the default name 'minimize' is used.

  Returns:
    optimizer_results: A namedtuple containing the following items:
      converged: Scalar boolean tensor indicating whether the minimum was
        found within tolerance.
      failed:  Scalar boolean tensor indicating whether a line search
        step failed to find a suitable step size satisfying Wolfe
        conditions. In the absence of any constraints on the
        number of objective evaluations permitted, this value will
        be the complement of `converged`. However, if there is
        a constraint and the search stopped due to available
        evaluations being exhausted, both `failed` and `converged`
        will be simultaneously False.
      num_objective_evaluations: The total number of objective
        evaluations performed.
      position: A tensor containing the last argument value found
        during the search. If the search converged, then
        this value is the argmin of the objective function.
      objective_value: A tensor containing the value of the objective
        function at the `position`. If the search converged, then this is
        the (local) minimum of the objective function.
      objective_gradient: A tensor containing the gradient of the objective
        function at the `position`. If the search converged the
        max-norm of this tensor should be below the tolerance.
      inverse_hessian_estimate: A tensor containing the inverse of the
        estimated Hessian.
  """
  with tf.name_scope(name, 'minimize', [initial_position,
                                        tolerance,
                                        initial_inverse_hessian_estimate]):
    initial_position = tf.convert_to_tensor(initial_position,
                                            name='initial_position')
    dtype = initial_position.dtype.base_dtype
    tolerance = tf.convert_to_tensor(tolerance, dtype=dtype,
                                     name='grad_tolerance')
    f_relative_tolerance = tf.convert_to_tensor(f_relative_tolerance,
                                                dtype=dtype,
                                                name='f_relative_tolerance')
    x_tolerance = tf.convert_to_tensor(x_tolerance,
                                       dtype=dtype,
                                       name='x_tolerance')
    max_iterations = tf.convert_to_tensor(max_iterations, name='max_iterations')

    domain_shape = distribution_util.prefer_static_shape(initial_position)

    if initial_inverse_hessian_estimate is None:
      inv_hessian_shape = tf.concat([domain_shape, domain_shape], 0)
      initial_inv_hessian = tf.eye(tf.size(initial_position), dtype=dtype)
      initial_inv_hessian = tf.reshape(initial_inv_hessian,
                                       inv_hessian_shape,
                                       name='initial_inv_hessian')
    else:
      initial_inv_hessian = tf.convert_to_tensor(
          initial_inverse_hessian_estimate,
          dtype=dtype,
          name='initial_inv_hessian')

    # If an initial inverse Hessian is supplied, ensure that it is positive
    # definite. The easiest way to validate this is to compute the Cholesky
    # decomposition. However, it seems that simply adding a control dependency
    # on the decomposition result is not enough to trigger it. We need to
    # add an assert on the result.
    if initial_inverse_hessian_estimate is not None:
      # The supplied Hessian may not be of rank 2. Reshape it so it is.
      initial_inv_hessian_sqr_mat = tf.reshape(
          initial_inverse_hessian_estimate,
          tf.stack([tf.size(initial_position),
                    tf.size(initial_position)], axis=0))
      # If the matrix is not positive definite, the Cholesky decomposition will
      # fail. Adding an assert on it ensures it will be triggered.
      cholesky_factor = tf.cholesky(initial_inv_hessian_sqr_mat)
      is_positive_definite = tf.reduce_all(tf.is_finite(cholesky_factor))
      asymmetry = tf.norm(initial_inv_hessian_sqr_mat -
                          tf.transpose(initial_inv_hessian_sqr_mat), np.inf)
      is_symmetric = tf.equal(asymmetry, 0)
      with tf.control_dependencies(
          [tf.Assert(is_positive_definite,
                     ['Initial inverse Hessian is not positive definite.',
                      initial_inverse_hessian_estimate]),
           tf.Assert(is_symmetric,
                     ['Initial inverse Hessian is not symmetric',
                      initial_inverse_hessian_estimate])]):
        f0, df0 = value_and_gradients_function(initial_position)
    else:
      f0, df0 = value_and_gradients_function(initial_position)

    initial_convergence = _initial_convergence_test(df0, tolerance)

    def _cond(converged,
              failed,
              iteration,
              *ignored_args):  # pylint: disable=unused-argument
      """Stopping condition for the algorithm."""
      keep_going = tf.logical_not(converged | failed |
                                  (iteration >= max_iterations))
      return keep_going

    def _body(converged,  # pylint: disable=unused-argument
              stopped,  # pylint: disable=unused-argument
              iteration,
              total_evals,
              position,
              objective_value,
              objective_gradient,
              input_inv_hessian_estimate):
      """Main optimization loop."""

      search_direction = _get_search_direction(input_inv_hessian_estimate,
                                               objective_gradient)
      derivative_at_start_pt = tf.reduce_sum(objective_gradient *
                                             search_direction)
      # If the derivative at the start point is not negative, reset the
      # Hessian estimate and recompute the search direction.
      needs_reset = derivative_at_start_pt >= 0
      def _reset_search_dirn():
        search_direction = _get_search_direction(initial_inv_hessian,
                                                 objective_gradient)
        return search_direction, initial_inv_hessian

      search_direction, inv_hessian_estimate = tf.contrib.framework.smart_cond(
          needs_reset,
          true_fn=_reset_search_dirn,
          false_fn=lambda: (search_direction, input_inv_hessian_estimate))
      line_search_value_grad_func = _restrict_along_direction(
          value_and_gradients_function, position, search_direction)
      derivative_at_start_pt = tf.reduce_sum(objective_gradient *
                                             search_direction)

      ls_result = linesearch.hager_zhang(
          line_search_value_grad_func,
          initial_step_size=tf.convert_to_tensor(1, dtype=dtype),
          objective_at_zero=objective_value,
          grad_objective_at_zero=derivative_at_start_pt)

      # Fail if the objective value is not finite or the line search failed.
      ls_failed = ~ls_result.converged

      # If the line search failed, then quit at this point.
      def _failed_fn():
        """Line search failed action."""
        failed_retval = BfgsOptimizerResults(
            converged=False,
            failed=True,
            num_iterations=iteration + 1,
            num_objective_evaluations=total_evals + ls_result.func_evals,
            position=position,
            objective_value=objective_value,
            objective_gradient=objective_gradient,
            inverse_hessian_estimate=inv_hessian_estimate)
        return failed_retval

      def _success_fn():
        return _bfgs_update(value_and_gradients_function,
                            position,
                            objective_value,
                            objective_gradient,
                            search_direction,
                            inv_hessian_estimate,
                            ls_result.left_pt,
                            iteration,
                            total_evals + ls_result.func_evals,
                            tolerance,
                            f_relative_tolerance,
                            x_tolerance)

      return tf.contrib.framework.smart_cond(
          ls_failed,
          true_fn=_failed_fn,
          false_fn=_success_fn)

    initial_values = BfgsOptimizerResults(
        converged=initial_convergence,
        failed=False,
        num_iterations=tf.convert_to_tensor(0),
        num_objective_evaluations=1,
        position=initial_position,
        objective_value=f0,
        objective_gradient=df0,
        inverse_hessian_estimate=initial_inv_hessian)

    return tf.while_loop(_cond, _body, initial_values,
                         parallel_iterations=parallel_iterations)
Ejemplo n.º 20
0
def lossfunc(x, alpha, scale, approximate=False, epsilon=1e-6):
    r"""Implements the general form of the loss.

  This implements the rho(x, \alpha, c) function described in "A General and
  Adaptive Robust Loss Function", Jonathan T. Barron,
  https://arxiv.org/abs/1701.03077.

  Args:
    x: The residual for which the loss is being computed. x can have any shape,
      and alpha and scale will be broadcasted to match x's shape if necessary.
      Must be a tensorflow tensor or numpy array of floats.
    alpha: The shape parameter of the loss (\alpha in the paper), where more
      negative values produce a loss with more robust behavior (outliers "cost"
      less), and more positive values produce a loss with less robust behavior
      (outliers are penalized more heavily). Alpha can be any value in
      [-infinity, infinity], but the gradient of the loss with respect to alpha
      is 0 at -infinity, infinity, 0, and 2. Must be a tensorflow tensor or
      numpy array of floats with the same precision as `x`. Varying alpha allows
      for smooth interpolation between a number of discrete robust losses:
      alpha=-Infinity: Welsch/Leclerc Loss.
      alpha=-2: Geman-McClure loss.
      alpha=0: Cauchy/Lortentzian loss.
      alpha=1: Charbonnier/pseudo-Huber loss.
      alpha=2: L2 loss.
    scale: The scale parameter of the loss. When |x| < scale, the loss is an
      L2-like quadratic bowl, and when |x| > scale the loss function takes on a
      different shape according to alpha. Must be a tensorflow tensor or numpy
      array of single-precision floats.
    approximate: a bool, where if True, this function returns an approximate and
      faster form of the loss, as described in the appendix of the paper. This
      approximation holds well everywhere except as x and alpha approach zero.
    epsilon: A float that determines how inaccurate the "approximate" version of
      the loss will be. Larger values are less accurate but more numerically
      stable. Must be great than single-precision machine epsilon.

  Returns:
    The losses for each element of x, in the same shape as x. This is returned
    as a TensorFlow graph node of single precision floats.
  """
    # `scale` and `alpha` must have the same type as `x`.
    float_dtype = x.dtype
    tf.debugging.assert_type(scale, float_dtype)
    tf.debugging.assert_type(alpha, float_dtype)
    # `scale` must be > 0.
    assert_ops = [tf.Assert(tf.reduce_all(tf.greater(scale, 0.)), [scale])]
    with tf.control_dependencies(assert_ops):
        # Broadcast `alpha` and `scale` to have the same shape as `x`.
        alpha = tf.broadcast_to(alpha, tf.shape(x))
        scale = tf.broadcast_to(scale, tf.shape(x))

        if approximate:
            # `epsilon` must be greater than single-precision machine epsilon.
            assert epsilon > np.finfo(np.float32).eps
            # Compute an approximate form of the loss which is faster, but innacurate
            # when x and alpha are near zero.
            b = tf.abs(alpha - tf.cast(2., float_dtype)) + epsilon
            d = tf.where(tf.greater_equal(alpha, 0.), alpha + epsilon,
                         alpha - epsilon)
            loss = (b / d) * (tf.pow(tf.square(x / scale) / b + 1., 0.5 * d) -
                              1.)
        else:
            # Compute the exact loss.

            # This will be used repeatedly.
            squared_scaled_x = tf.square(x / scale)

            # The loss when alpha == 2.
            loss_two = 0.5 * squared_scaled_x
            # The loss when alpha == 0.
            loss_zero = util.log1p_safe(0.5 * squared_scaled_x)
            # The loss when alpha == -infinity.
            loss_neginf = -tf.math.expm1(-0.5 * squared_scaled_x)
            # The loss when alpha == +infinity.
            loss_posinf = util.expm1_safe(0.5 * squared_scaled_x)

            # The loss when not in one of the above special cases.
            machine_epsilon = tf.cast(np.finfo(np.float32).eps, float_dtype)
            # Clamp |2-alpha| to be >= machine epsilon so that it's safe to divide by.
            beta_safe = tf.maximum(machine_epsilon, tf.abs(alpha - 2.))
            # Clamp |alpha| to be >= machine epsilon so that it's safe to divide by.
            alpha_safe = tf.where(tf.greater_equal(alpha, 0.),
                                  tf.ones_like(alpha),
                                  -tf.ones_like(alpha)) * tf.maximum(
                                      machine_epsilon, tf.abs(alpha))
            loss_otherwise = (beta_safe / alpha_safe) * (
                tf.pow(squared_scaled_x / beta_safe + 1., 0.5 * alpha) - 1.)

            # Select which of the cases of the loss to return.
            loss = tf.where(
                tf.equal(alpha, -tf.cast(float('inf'), float_dtype)),
                loss_neginf,
                tf.where(
                    tf.equal(alpha, 0.), loss_zero,
                    tf.where(
                        tf.equal(alpha, 2.), loss_two,
                        tf.where(
                            tf.equal(alpha, tf.cast(float('inf'),
                                                    float_dtype)), loss_posinf,
                            loss_otherwise))))

        return loss
Ejemplo n.º 21
0
 def polymorphic_distribution_fn(example):
     action_inputs = input_fn_and_spec[0](example)
     tf.nest.map_structure(
         lambda spec, t: tf.Assert(spec.is_compatible_with(t[
             0]), [t]), action_fn_input_spec, action_inputs)
     return distribution_fn(*action_inputs)
 def MyFn(x):
     with tf.control_dependencies(
         [tf.Assert(tf.less_equal(x, 10.0), [x])]):
         return tf.identity(x)
Ejemplo n.º 23
0
def assert_in_range(x, *, min, max):
    """Asserts that x is in [min, max] elementwise"""
    return tf.Assert(
        tf.logical_and(tf.greater_equal(tf.reduce_min(x), min),
                       tf.less_equal(tf.reduce_max(x), max)), [x])
Ejemplo n.º 24
0
def _brent(objective_fn,
           left_bracket,
           right_bracket,
           value_at_left_bracket=None,
           value_at_right_bracket=None,
           absolute_root_tolerance=2e-7,
           relative_root_tolerance=None,
           function_tolerance=2e-7,
           max_iterations=100,
           stopping_policy_fn=None,
           validate_args=False,
           name=None):
  r"""Finds root(s) of a function of a single variable using Brent's method.

  [Brent's method](https://en.wikipedia.org/wiki/Brent%27s_method) is a
  root-finding algorithm combining the bisection method, the secant method and
  extrapolation. Like bisection it is guaranteed to converge towards a root if
  one exists, but that convergence is superlinear and on par with less reliable
  methods.

  This implementation is a translation of the algorithm described in the
  [original article](https://academic.oup.com/comjnl/article/14/4/422/325237).

  Args:
    objective_fn: Python callable for which roots are searched. It must be a
      callable of a single `Tensor` parameter and return a `Tensor` of the same
      shape and dtype as `left_bracket`.
    left_bracket: `Tensor` or Python float representing the first starting
      points. The function will search for roots between each pair of points
      defined by `left_bracket` and `right_bracket`. The shape of `left_bracket`
      should match that of the input to `objective_fn`.
    right_bracket: `Tensor` of the same shape and dtype as `left_bracket` or
      Python float representing the second starting points. The function will
      search for roots between each pair of points defined by `left_bracket` and
      `right_bracket`. This argument must have the same shape as `left_bracket`.
    value_at_left_bracket: Optional `Tensor` or Pyhon float representing the
      value of `objective_fn` at `left_bracket`. If specified, this argument
      must have the same shape as `left_bracket`. If not specified, the value
      will be evaluated during the search.
      Default value: None.
    value_at_right_bracket: Optional `Tensor` or Pyhon float representing the
      value of `objective_fn` at `right_bracket`. If specified, this argument
      must have the same shape as `right_bracket`. If not specified, the value
      will be evaluated during the search.
      Default value: None.
    absolute_root_tolerance: Optional `Tensor` representing the absolute
      tolerance for estimated roots, with the total tolerance being calculated
      as `(absolute_root_tolerance + relative_root_tolerance * |root|) / 2`. If
      specified, this argument must be positive, broadcast with the shape of
      `left_bracket` and have the same dtype.
      Default value: `2e-7`.
    relative_root_tolerance: Optional `Tensor` representing the relative
      tolerance for estimated roots, with the total tolerance being calculated
      as `(absolute_root_tolerance + relative_root_tolerance * |root|) / 2`. If
      specified, this argument must be positive, broadcast with the shape of
      `left_bracket` and have the same dtype.
      Default value: `None` which translates to `4 *
        numpy.finfo(left_bracket.dtype.as_numpy_dtype).eps`.
    function_tolerance: Optional `Tensor` representing the tolerance used to
      check for roots. If the absolute value of `objective_fn` is smaller than
      or equal to `function_tolerance` at a given estimate, then that estimate
      is considered a root for the function. If specified, this argument must
      broadcast with the shape of `left_bracket` and have the same dtype. Set to
      zero to match Brent's original algorithm and to continue the search until
      an exact root is found.
      Default value: `2e-7`.
    max_iterations: Optional `Tensor` of an integral dtype or Python integer
      specifying the maximum number of steps to perform for each initial point.
      Must broadcast with the shape of `left_bracket`. If an element is set to
      zero, the function will not search for any root for the corresponding
      points in `left_bracket` and `right_bracket`. Instead, it will return the
      best estimate from the inputs.
      Default value: `100`.
    stopping_policy_fn: Python `callable` controlling the algorithm termination.
      It must be a callable accepting a `Tensor` of booleans with the shape of
      `left_bracket` (each denoting whether the search is finished for each
      starting point), and returning a scalar boolean `Tensor` (indicating
      whether the overall search should stop). Typical values are
      `tf.reduce_all` (which returns only when the search is finished for all
      pairs of points), and `tf.reduce_any` (which returns as soon as the search
      is finished for any pair of points).
      Default value: `None` which translates to `tf.reduce_all`.
    validate_args: Python `bool` indicating whether to validate arguments such
      as `left_bracket`, `right_bracket`, `absolute_root_tolerance`,
      `relative_root_tolerance`, `function_tolerance`, and `max_iterations`.
      Default value: `False`.
    name: Python `str` name prefixed to ops created by this function.

  Returns:
    brent_results: A Python object containing the following attributes:
      estimated_root: `Tensor` containing the best estimate explored. If the
        search was successful within the specified tolerance, this estimate is
        a root of the objective function.
      objective_at_estimated_root: `Tensor` containing the value of the
        objective function at `estimated_root`. If the search was successful
        within the specified tolerance, then this is close to 0. It has the
        same dtype and shape as `estimated_root`.
      num_iterations: `Tensor` containing the number of iterations performed.
        It has the same dtype as `max_iterations` and shape as `estimated_root`.
      converged: Scalar boolean `Tensor` indicating whether `estimated_root` is
        a root within the tolerance specified for the search. It has the same
        shape as `estimated_root`.

  Raises:
    ValueError: if the `stopping_policy_fn` is not callable.
  """

  with tf.name_scope(name, "brent_root", [
      left_bracket, right_bracket, value_at_left_bracket,
      value_at_right_bracket, max_iterations
  ]):

    state, params, constants = _prepare_brent_args(
        objective_fn, left_bracket, right_bracket, value_at_left_bracket,
        value_at_right_bracket, absolute_root_tolerance,
        relative_root_tolerance, function_tolerance, max_iterations,
        stopping_policy_fn)

    assertions = []
    if validate_args:
      assertions += [
          tf.Assert(
              tf.reduce_all(
                  state.value_at_last_estimate *
                  state.value_at_best_estimate <= constants.zero_value),
              [state.value_at_last_estimate, state.value_at_best_estimate]),
          tf.Assert(
              tf.reduce_all(params.absolute_root_tolerance > constants.zero),
              [params.absolute_root_tolerance]),
          tf.Assert(
              tf.reduce_all(params.relative_root_tolerance > constants.zero),
              [params.relative_root_tolerance]),
          tf.Assert(
              tf.reduce_all(params.function_tolerance >= constants.zero),
              [params.function_tolerance]),
          tf.Assert(
              tf.reduce_all(params.max_iterations >= state.num_iterations),
              [params.max_iterations]),
      ]

    with tf.control_dependencies(assertions):
      result = tf.while_loop(
          # Negate `_should_stop` to determine if the search should continue.
          # This means, in particular, that tf.reduce_*all* will return only
          # when the search is finished for *all* starting points.
          lambda loop_vars: ~_should_stop(loop_vars, params.stopping_policy_fn),
          lambda state: _brent_loop_body(state, params, constants),
          loop_vars=[state])

  state = result[0]
  converged = tf.math.abs(state.value_at_best_estimate) <= function_tolerance

  return BrentResults(
      estimated_root=state.best_estimate,
      objective_at_estimated_root=state.value_at_best_estimate,
      num_iterations=state.num_iterations,
      converged=converged)
Ejemplo n.º 25
0
    def _project_distribution(self,
                              supports,
                              weights,
                              target_support,
                              validate_args=False):
        """Projects a batch of (support, weights) onto target_support.
    Based on equation (7) in (Bellemare et al., 2017):
      https://arxiv.org/abs/1707.06887
    In the rest of the comments we will refer to this equation simply as Eq7.
    This code is not easy to digest, so we will use a running example to clarify
    what is going on, with the following sample inputs:
      * supports =       [[0, 2, 4, 6, 8],
                          [1, 3, 4, 5, 6]]
      * weights =        [[0.1, 0.6, 0.1, 0.1, 0.1],
                          [0.1, 0.2, 0.5, 0.1, 0.1]]
      * target_support = [4, 5, 6, 7, 8]
    In the code below, comments preceded with 'Ex:' will be referencing the above
    values.
    Args:
      supports: Tensor of shape (batch_size, num_dims) defining supports for the
        distribution.
      weights: Tensor of shape (batch_size, num_dims) defining weights on the
        original support points. Although for the CategoricalDQN agent these
        weights are probabilities, it is not required that they are.
      target_support: Tensor of shape (num_dims) defining support of the projected
        distribution. The values must be monotonically increasing. Vmin and Vmax
        will be inferred from the first and last elements of this tensor,
        respectively. The values in this tensor must be equally spaced.
      validate_args: Whether we will verify the contents of the
        target_support parameter.
    Returns:
      A Tensor of shape (batch_size, num_dims) with the projection of a batch of
      (support, weights) onto target_support.
    Raises:
      ValueError: If target_support has no dimensions, or if shapes of supports,
        weights, and target_support are incompatible.
    """
        target_support_deltas = target_support[1:] - target_support[:-1]
        # delta_z = `\Delta z` in Eq7.
        delta_z = target_support_deltas[0]
        validate_deps = []
        supports.shape.assert_is_compatible_with(weights.shape)
        supports[0].shape.assert_is_compatible_with(target_support.shape)
        target_support.shape.assert_has_rank(1)
        if validate_args:
            # Assert that supports and weights have the same shapes.
            validate_deps.append(
                tf.Assert(
                    tf.reduce_all(
                        tf.equal(tf.shape(supports), tf.shape(weights))),
                    [supports, weights]))
            # Assert that elements of supports and target_support have the same shape.
            validate_deps.append(
                tf.Assert(
                    tf.reduce_all(
                        tf.equal(
                            tf.shape(supports)[1], tf.shape(target_support))),
                    [supports, target_support]))
            # Assert that target_support has a single dimension.
            validate_deps.append(
                tf.Assert(tf.equal(tf.size(tf.shape(target_support)), 1),
                          [target_support]))
            # Assert that the target_support is monotonically increasing.
            validate_deps.append(
                tf.Assert(tf.reduce_all(target_support_deltas > 0),
                          [target_support]))
            # Assert that the values in target_support are equally spaced.
            validate_deps.append(
                tf.Assert(
                    tf.reduce_all(tf.equal(target_support_deltas, delta_z)),
                    [target_support]))

        with tf.control_dependencies(validate_deps):
            # Ex: `v_min, v_max = 4, 8`.
            v_min, v_max = target_support[0], target_support[-1]
            # Ex: `batch_size = 2`.
            batch_size = tf.shape(supports)[0]
            # `N` in Eq7.
            # Ex: `num_dims = 5`.
            num_dims = tf.shape(target_support)[0]
            # clipped_support = `[\hat{T}_{z_j}]^{V_max}_{V_min}` in Eq7.
            # Ex: `clipped_support = [[[ 4.  4.  4.  6.  8.]]
            #                         [[ 4.  4.  4.  5.  6.]]]`.
            clipped_support = tf.clip_by_value(supports, v_min, v_max)[:,
                                                                       None, :]
            # Ex: `tiled_support = [[[[ 4.  4.  4.  6.  8.]
            #                         [ 4.  4.  4.  6.  8.]
            #                         [ 4.  4.  4.  6.  8.]
            #                         [ 4.  4.  4.  6.  8.]
            #                         [ 4.  4.  4.  6.  8.]]
            #                        [[ 4.  4.  4.  5.  6.]
            #                         [ 4.  4.  4.  5.  6.]
            #                         [ 4.  4.  4.  5.  6.]
            #                         [ 4.  4.  4.  5.  6.]
            #                         [ 4.  4.  4.  5.  6.]]]]`.
            tiled_support = tf.tile([clipped_support], [1, 1, num_dims, 1])
            # Ex: `reshaped_target_support = [[[ 4.]
            #                                  [ 5.]
            #                                  [ 6.]
            #                                  [ 7.]
            #                                  [ 8.]]
            #                                 [[ 4.]
            #                                  [ 5.]
            #                                  [ 6.]
            #                                  [ 7.]
            #                                  [ 8.]]]`.
            reshaped_target_support = tf.tile(target_support[:, None],
                                              [batch_size, 1])
            reshaped_target_support = tf.reshape(reshaped_target_support,
                                                 [batch_size, num_dims, 1])
            # numerator = `|clipped_support - z_i|` in Eq7.
            # Ex: `numerator = [[[[ 0.  0.  0.  2.  4.]
            #                     [ 1.  1.  1.  1.  3.]
            #                     [ 2.  2.  2.  0.  2.]
            #                     [ 3.  3.  3.  1.  1.]
            #                     [ 4.  4.  4.  2.  0.]]
            #                    [[ 0.  0.  0.  1.  2.]
            #                     [ 1.  1.  1.  0.  1.]
            #                     [ 2.  2.  2.  1.  0.]
            #                     [ 3.  3.  3.  2.  1.]
            #                     [ 4.  4.  4.  3.  2.]]]]`.
            numerator = tf.abs(tiled_support - reshaped_target_support)
            quotient = 1 - (numerator / delta_z)
            # clipped_quotient = `[1 - numerator / (\Delta z)]_0^1` in Eq7.
            # Ex: `clipped_quotient = [[[[ 1.  1.  1.  0.  0.]
            #                            [ 0.  0.  0.  0.  0.]
            #                            [ 0.  0.  0.  1.  0.]
            #                            [ 0.  0.  0.  0.  0.]
            #                            [ 0.  0.  0.  0.  1.]]
            #                           [[ 1.  1.  1.  0.  0.]
            #                            [ 0.  0.  0.  1.  0.]
            #                            [ 0.  0.  0.  0.  1.]
            #                            [ 0.  0.  0.  0.  0.]
            #                            [ 0.  0.  0.  0.  0.]]]]`.
            clipped_quotient = tf.clip_by_value(quotient, 0, 1)
            # Ex: `weights = [[ 0.1  0.6  0.1  0.1  0.1]
            #                 [ 0.1  0.2  0.5  0.1  0.1]]`.
            weights = weights[:, None, :]
            # inner_prod = `\sum_{j=0}^{N-1} clipped_quotient * p_j(x', \pi(x'))`
            # in Eq7.
            # Ex: `inner_prod = [[[[ 0.1  0.6  0.1  0.  0. ]
            #                      [ 0.   0.   0.   0.  0. ]
            #                      [ 0.   0.   0.   0.1 0. ]
            #                      [ 0.   0.   0.   0.  0. ]
            #                      [ 0.   0.   0.   0.  0.1]]
            #                     [[ 0.1  0.2  0.5  0.  0. ]
            #                      [ 0.   0.   0.   0.1 0. ]
            #                      [ 0.   0.   0.   0.  0.1]
            #                      [ 0.   0.   0.   0.  0. ]
            #                      [ 0.   0.   0.   0.  0. ]]]]`.
            inner_prod = clipped_quotient * weights
            # Ex: `projection = [[ 0.8 0.0 0.1 0.0 0.1]
            #                    [ 0.8 0.1 0.1 0.0 0.0]]`.
            projection = tf.reduce_sum(inner_prod, 3)
            projection = tf.reshape(projection, [batch_size, num_dims])
            return projection
Ejemplo n.º 26
0
 def __call__(self, t, x):
     with tf.device("/cpu:0"):
         check = tf.Assert(self.ph, [t])
     with tf.control_dependencies([check]):
         y = tf.identity(x)
     return y
Ejemplo n.º 27
0
def detection_targets_graph(proposals, gt_class_ids, gt_boxes, config):
    """Generates detection targets for one image. Subsamples proposals and
    generates target class IDs, bounding box deltas for each.

    Inputs:
    proposals: [N, (y1, x1, y2, x2)] in normalized coordinates. Might
               be zero padded if there are not enough proposals.
    gt_class_ids: [MAX_GT_INSTANCES] int class IDs
    gt_boxes: [MAX_GT_INSTANCES, (y1, x1, y2, x2)] in normalized coordinates.

    Returns: Target ROIs and corresponding class IDs, bounding box shifts
    rois: [TRAIN_ROIS_PER_IMAGE, (y1, x1, y2, x2)] in normalized coordinates
    class_ids: [TRAIN_ROIS_PER_IMAGE]. Integer class IDs. Zero padded.
    deltas: [TRAIN_ROIS_PER_IMAGE, NUM_CLASSES, (dy, dx, log(dh), log(dw))]
            Class-specific bbox refinments.

    Note: Returned arrays might be zero padded if not enough target ROIs.
    """
    # Assertions
    asserts = [
        tf.Assert(tf.greater(tf.shape(proposals)[0], 0), [proposals],
                  name="roi_assertion"),
    ]
    with tf.control_dependencies(asserts):
        proposals = tf.identity(proposals)

    # Remove zero padding
    proposals, _ = trim_zeros_graph(proposals, name="trim_proposals")
    gt_boxes, non_zeros = trim_zeros_graph(gt_boxes, name="trim_gt_boxes")
    gt_class_ids = tf.boolean_mask(gt_class_ids,
                                   non_zeros,
                                   name="trim_gt_class_ids")

    # Handle COCO crowds
    # A crowd box in COCO is a bounding box around several instances. Exclude
    # them from training. A crowd box is given a negative class ID.
    crowd_ix = tf.where(gt_class_ids < 0)[:, 0]
    non_crowd_ix = tf.where(gt_class_ids > 0)[:, 0]
    crowd_boxes = tf.gather(gt_boxes, crowd_ix)

    gt_class_ids = tf.gather(gt_class_ids, non_crowd_ix)
    gt_boxes = tf.gather(gt_boxes, non_crowd_ix)

    # Compute overlaps matrix [proposals, gt_boxes]
    overlaps = overlaps_graph(proposals, gt_boxes)

    # Compute overlaps with crowd boxes [anchors, crowds]
    crowd_overlaps = overlaps_graph(proposals, crowd_boxes)
    crowd_iou_max = tf.reduce_max(crowd_overlaps, axis=1)
    no_crowd_bool = (crowd_iou_max < 0.001)

    # Determine postive and negative ROIs
    roi_iou_max = tf.reduce_max(overlaps, axis=1)
    # 1. Positive ROIs are those with >= 0.5 IoU with a GT box
    positive_roi_bool = (roi_iou_max >= 0.5)
    positive_indices = tf.where(positive_roi_bool)[:, 0]
    # 2. Negative ROIs are those with < 0.5 with every GT box. Skip crowds.
    negative_indices = tf.where(
        tf.logical_and(roi_iou_max < 0.5, no_crowd_bool))[:, 0]

    # Subsample ROIs. Aim for 33% positive
    # Positive ROIs
    positive_count = int(config.TRAIN_ROIS_PER_IMAGE *
                         config.ROI_POSITIVE_RATIO)
    positive_indices = tf.random_shuffle(positive_indices)[:positive_count]
    positive_count = tf.shape(positive_indices)[0]
    # Negative ROIs. Add enough to maintain positive:negative ratio.
    r = 1.0 / config.ROI_POSITIVE_RATIO
    negative_count = tf.cast(r * tf.cast(positive_count, tf.float32),
                             tf.int32) - positive_count
    negative_indices = tf.random_shuffle(negative_indices)[:negative_count]
    # Gather selected ROIs
    positive_rois = tf.gather(proposals, positive_indices)
    negative_rois = tf.gather(proposals, negative_indices)

    # Assign positive ROIs to GT boxes.
    positive_overlaps = tf.gather(overlaps, positive_indices)
    roi_gt_box_assignment = tf.argmax(positive_overlaps, axis=1)
    roi_gt_boxes = tf.gather(gt_boxes, roi_gt_box_assignment)
    roi_gt_class_ids = tf.gather(gt_class_ids, roi_gt_box_assignment)

    # Compute bbox refinement for positive ROIs
    deltas = KerasRFCN.Utils.box_refinement_graph(positive_rois, roi_gt_boxes)
    deltas /= config.BBOX_STD_DEV

    # Append negative ROIs and pad bbox deltas and masks that
    # are not used for negative ROIs with zeros.
    rois = tf.concat([positive_rois, negative_rois], axis=0)
    N = tf.shape(negative_rois)[0]
    P = tf.maximum(config.TRAIN_ROIS_PER_IMAGE - tf.shape(rois)[0], 0)
    rois = tf.pad(rois, [(0, P), (0, 0)])
    roi_gt_boxes = tf.pad(roi_gt_boxes, [(0, N + P), (0, 0)])
    roi_gt_class_ids = tf.pad(roi_gt_class_ids, [(0, N + P)])
    deltas = tf.pad(deltas, [(0, N + P), (0, 0)])

    return rois, roi_gt_class_ids, deltas
Ejemplo n.º 28
0
    def extract_features(self, preprocessed_inputs, init_extraction=False):
        """Extract features from preprocessed inputs.
  
        Args:
          preprocessed_inputs: a [batch, height, width, channels] float tensor
            representing a batch of images.
  
        Returns:
          feature_maps: a list of tensors where the ith tensor has shape
            [batch, height_i, width_i, depth_i]
        """
        if init_extraction:
            preprocessed_inputs.get_shape().assert_has_rank(4)
            shape_assert = tf.Assert(
                tf.logical_and(
                    tf.greater_equal(tf.shape(preprocessed_inputs)[1], 33),
                    tf.greater_equal(tf.shape(preprocessed_inputs)[2], 33)),
                ['image size must at least be 33 in both height and width.'])
            with tf.control_dependencies([shape_assert]):
                with slim.arg_scope(self._conv_hyperparams):
                    with tf.variable_scope('MobilenetV1',
                                           reuse=self._reuse_weights) as scope:
                        _, image_features = mobilenet_v1.mobilenet_v1_base(
                            preprocessed_inputs,
                            final_endpoint='Conv2d_13_pointwise',
                            min_depth=self._min_depth,
                            depth_multiplier=self._depth_multiplier,
                            scope=scope)
                        feature_head = image_features['Conv2d_13_pointwise']
                        feature_head = slim.conv2d(
                            feature_head,
                            512, [3, 3],
                            stride=1,
                            padding='SAME',
                            scope='Conv2d_Append_1x1_256')
                        feature_head = tf.nn.avg_pool(
                            feature_head,
                            strides=[1, 1, 1, 1],
                            ksize=[1, 4, 4, 1],
                            padding='VALID',
                        )
                        return feature_head
        else:
            preprocessed_inputs.get_shape().assert_has_rank(4)
            shape_assert = tf.Assert(
                tf.logical_and(
                    tf.greater_equal(tf.shape(preprocessed_inputs)[1], 33),
                    tf.greater_equal(tf.shape(preprocessed_inputs)[2], 33)),
                ['image size must at least be 33 in both height and width.'])

            bottomup_features_names = [
                'Conv2d_11_pointwise', 'Conv2d_13_pointwise'
            ]
            num_appended_layers = 0
            #appended_channel_num = [512, 256, 256, 256]
            appended_channel_num = [512]

            with tf.control_dependencies([shape_assert]):
                with slim.arg_scope(self._conv_hyperparams):
                    with tf.variable_scope('MobilenetV1',
                                           reuse=self._reuse_weights) as scope:
                        _, image_features = mobilenet_v1.mobilenet_v1_base(
                            preprocessed_inputs,
                            final_endpoint='Conv2d_13_pointwise',
                            min_depth=self._min_depth,
                            depth_multiplier=self._depth_multiplier,
                            scope=scope)

                        topdown_features = self._topdown_feature_maps(
                            image_features,
                            bottomup_features_names=bottomup_features_names,
                            num_appended_layers=num_appended_layers,
                            appended_channel_num=appended_channel_num)
            return topdown_features.values()
Ejemplo n.º 29
0
def box_voting(selected_boxes, pool_boxes, iou_thresh=0.5):
    """Performs box voting as described in S. Gidaris and N. Komodakis, ICCV 2015.

  Performs box voting as described in 'Object detection via a multi-region &
  semantic segmentation-aware CNN model', Gidaris and Komodakis, ICCV 2015. For
  each box 'B' in selected_boxes, we find the set 'S' of boxes in pool_boxes
  with iou overlap >= iou_thresh. The location of B is set to the weighted
  average location of boxes in S (scores are used for weighting). And the score
  of B is set to the average score of boxes in S.

  Args:
    selected_boxes: BoxList containing a subset of boxes in pool_boxes. These
      boxes are usually selected from pool_boxes using non max suppression.
    pool_boxes: BoxList containing a set of (possibly redundant) boxes.
    iou_thresh: (float scalar) iou threshold for matching boxes in
      selected_boxes and pool_boxes.

  Returns:
    BoxList containing averaged locations and scores for each box in
    selected_boxes.

  Raises:
    ValueError: if
      a) selected_boxes or pool_boxes is not a BoxList.
      b) if iou_thresh is not in [0, 1].
      c) pool_boxes does not have a scores field.
  """
    if not 0.0 <= iou_thresh <= 1.0:
        raise ValueError('iou_thresh must be between 0 and 1')
    if not isinstance(selected_boxes, box_list.BoxList):
        raise ValueError('selected_boxes must be a BoxList')
    if not isinstance(pool_boxes, box_list.BoxList):
        raise ValueError('pool_boxes must be a BoxList')
    if not pool_boxes.has_field('scores'):
        raise ValueError('pool_boxes must have a \'scores\' field')

    iou_ = iou(selected_boxes, pool_boxes)
    match_indicator = tf.to_float(tf.greater(iou_, iou_thresh))
    num_matches = tf.reduce_sum(match_indicator, 1)
    # TODO: Handle the case where some boxes in selected_boxes do not
    # match to any boxes in pool_boxes. For such boxes without any matches, we
    # should return the original boxes without voting.
    match_assert = tf.Assert(tf.reduce_all(tf.greater(num_matches, 0)), [
        'Each box in selected_boxes must match with at least one box '
        'in pool_boxes.'
    ])

    scores = tf.expand_dims(pool_boxes.get_field('scores'), 1)
    scores_assert = tf.Assert(tf.reduce_all(tf.greater_equal(scores, 0)),
                              ['Scores must be non negative.'])

    with tf.control_dependencies([scores_assert, match_assert]):
        sum_scores = tf.matmul(match_indicator, scores)
    averaged_scores = tf.reshape(sum_scores, [-1]) / num_matches

    box_locations = tf.matmul(match_indicator,
                              pool_boxes.get() * scores) / sum_scores
    averaged_boxes = box_list.BoxList(box_locations)
    _copy_extra_fields(averaged_boxes, selected_boxes)
    averaged_boxes.add_field('scores', averaged_scores)
    return averaged_boxes
Ejemplo n.º 30
0
def detection_targets_graph(proposals, gt_class_ids, gt_boxes, gt_masks, config):
    asserts = [
        tf.Assert(tf.greater(tf.shape(proposals)[0], 0), [proposals],
                  name="roi_assertion"),
    ]
    with tf.control_dependencies(asserts):
        proposals = tf.identity(proposals)

    # 移除之前获得的padding的部分
    proposals, _ = trim_zeros_graph(proposals, name="trim_proposals")
    gt_boxes, non_zeros = trim_zeros_graph(gt_boxes, name="trim_gt_boxes")
    gt_class_ids = tf.boolean_mask(gt_class_ids, non_zeros,
                                   name="trim_gt_class_ids")
    gt_masks = tf.gather(gt_masks, tf.where(non_zeros)[:, 0], axis=2,
                         name="trim_gt_masks")

    # Handle COCO crowds
    # A crowd box in COCO is a bounding box around several instances. Exclude
    # them from training. A crowd box is given a negative class ID.
    crowd_ix = tf.where(gt_class_ids < 0)[:, 0]
    non_crowd_ix = tf.where(gt_class_ids > 0)[:, 0]
    crowd_boxes = tf.gather(gt_boxes, crowd_ix)
    gt_class_ids = tf.gather(gt_class_ids, non_crowd_ix)
    gt_boxes = tf.gather(gt_boxes, non_crowd_ix)
    gt_masks = tf.gather(gt_masks, non_crowd_ix, axis=2)

    # 计算建议框和所有真实框的重合程度 [proposals, gt_boxes]
    overlaps = overlaps_graph(proposals, gt_boxes)

    # 计算和 crowd boxes 的重合程度 [proposals, crowd_boxes]
    crowd_overlaps = overlaps_graph(proposals, crowd_boxes)
    crowd_iou_max = tf.reduce_max(crowd_overlaps, axis=1)
    no_crowd_bool = (crowd_iou_max < 0.001)

    # Determine positive and negative ROIs
    roi_iou_max = tf.reduce_max(overlaps, axis=1)
    # 1. 正样本建议框和真实框的重合程度大于0.5
    positive_roi_bool = (roi_iou_max >= 0.5)
    positive_indices = tf.where(positive_roi_bool)[:, 0]
    # 2. 负样本建议框和真实框的重合程度小于0.5,Skip crowds.
    negative_indices = tf.where(tf.logical_and(roi_iou_max < 0.5, no_crowd_bool))[:, 0]

    # Subsample ROIs. Aim for 33% positive
    # 进行正负样本的平衡
    # 取出最大33%的正样本
    positive_count = int(config.TRAIN_ROIS_PER_IMAGE *
                         config.ROI_POSITIVE_RATIO)
    positive_indices = tf.random_shuffle(positive_indices)[:positive_count]
    positive_count = tf.shape(positive_indices)[0]
    # 保持正负样本比例
    r = 1.0 / config.ROI_POSITIVE_RATIO
    negative_count = tf.cast(r * tf.cast(positive_count, tf.float32), tf.int32) - positive_count
    negative_indices = tf.random_shuffle(negative_indices)[:negative_count]
    # 获得正样本和负样本
    positive_rois = tf.gather(proposals, positive_indices)
    negative_rois = tf.gather(proposals, negative_indices)

    # 获取建议框和真实框重合程度
    positive_overlaps = tf.gather(overlaps, positive_indices)
    
    # 判断是否有真实框
    roi_gt_box_assignment = tf.cond(
        tf.greater(tf.shape(positive_overlaps)[1], 0),
        true_fn = lambda: tf.argmax(positive_overlaps, axis=1),
        false_fn = lambda: tf.cast(tf.constant([]),tf.int64)
    )
    # 找到每一个建议框对应的真实框和种类
    roi_gt_boxes = tf.gather(gt_boxes, roi_gt_box_assignment)
    roi_gt_class_ids = tf.gather(gt_class_ids, roi_gt_box_assignment)

    # 解码获得网络应该有得预测结果
    deltas = utils.box_refinement_graph(positive_rois, roi_gt_boxes)
    deltas /= config.BBOX_STD_DEV

    # 切换mask的形式[N, height, width, 1]
    transposed_masks = tf.expand_dims(tf.transpose(gt_masks, [2, 0, 1]), -1)
    
    # 取出对应的层
    roi_masks = tf.gather(transposed_masks, roi_gt_box_assignment)

    # Compute mask targets
    boxes = positive_rois
    if config.USE_MINI_MASK:
        # Transform ROI coordinates from normalized image space
        # to normalized mini-mask space.
        y1, x1, y2, x2 = tf.split(positive_rois, 4, axis=1)
        gt_y1, gt_x1, gt_y2, gt_x2 = tf.split(roi_gt_boxes, 4, axis=1)
        gt_h = gt_y2 - gt_y1
        gt_w = gt_x2 - gt_x1
        y1 = (y1 - gt_y1) / gt_h
        x1 = (x1 - gt_x1) / gt_w
        y2 = (y2 - gt_y1) / gt_h
        x2 = (x2 - gt_x1) / gt_w
        boxes = tf.concat([y1, x1, y2, x2], 1)
    box_ids = tf.range(0, tf.shape(roi_masks)[0])
    masks = tf.image.crop_and_resize(tf.cast(roi_masks, tf.float32), boxes,
                                     box_ids,
                                     config.MASK_SHAPE)
    # Remove the extra dimension from masks.
    masks = tf.squeeze(masks, axis=3)

    # 防止resize后的结果不是1或者0
    masks = tf.round(masks)

    # 一般传入config.TRAIN_ROIS_PER_IMAGE个建议框进行训练,
    # 如果数量不够则padding
    rois = tf.concat([positive_rois, negative_rois], axis=0)
    N = tf.shape(negative_rois)[0]
    P = tf.maximum(config.TRAIN_ROIS_PER_IMAGE - tf.shape(rois)[0], 0)
    rois = tf.pad(rois, [(0, P), (0, 0)])
    roi_gt_boxes = tf.pad(roi_gt_boxes, [(0, N + P), (0, 0)])
    roi_gt_class_ids = tf.pad(roi_gt_class_ids, [(0, N + P)])
    deltas = tf.pad(deltas, [(0, N + P), (0, 0)])
    masks = tf.pad(masks, [[0, N + P], (0, 0), (0, 0)])

    return rois, roi_gt_class_ids, deltas, masks