Esempio n. 1
0
  def predict(args, img):
    """Predicts segmentation labels and softmax probabilities for each image.

    Args:
      args: tuple of
        (predictions, softmax_probabilities, last_frame_embeddings), where
        predictions is an int tensor of shape [1, h, w],
        softmax_probabilities is a float32 tensor of shape
        [1, h_decoder, w_decoder, n_objects],
        and last_frame_embeddings is a float32 tensor of shape
        [h_decoder, w_decoder, embedding_dimension].
      img: Image to predict labels for of shape [h, w, 3].

    Returns:
      predictions: The predicted labels as int tensor of shape [1, h, w].
      softmax_probabilities: The softmax probabilities of shape
        [1, h_decoder, w_decoder, n_objects].
    """
    (last_frame_predictions, last_softmax_probabilities,
     prev_frame_embeddings) = args
    ref_labels_to_use = tf.concat(
        [reference_labels, last_frame_predictions[..., tf.newaxis]],
        axis=0)

    predictions, softmax_probabilities, embeddings = model.predict_labels(
        img[tf.newaxis],
        model_options=model_options,
        image_pyramid=FLAGS.image_pyramid,
        embedding_dimension=FLAGS.embedding_dimension,
        reference_labels=ref_labels_to_use,
        k_nearest_neighbors=FLAGS.k_nearest_neighbors,
        use_softmax_feedback=FLAGS.use_softmax_feedback,
        initial_softmax_feedback=last_softmax_probabilities,
        embedding_seg_feature_dimension=
        FLAGS.embedding_seg_feature_dimension,
        embedding_seg_n_layers=FLAGS.embedding_seg_n_layers,
        embedding_seg_kernel_size=FLAGS.embedding_seg_kernel_size,
        embedding_seg_atrous_rates=FLAGS.embedding_seg_atrous_rates,
        also_return_softmax_probabilities=True,
        num_frames_per_video=1,
        normalize_nearest_neighbor_distances=
        FLAGS.normalize_nearest_neighbor_distances,
        also_attend_to_previous_frame=FLAGS.also_attend_to_previous_frame,
        use_local_previous_frame_attention=
        FLAGS.use_local_previous_frame_attention,
        previous_frame_attention_window_size=
        FLAGS.previous_frame_attention_window_size,
        use_first_frame_matching=FLAGS.use_first_frame_matching,
        also_return_embeddings=True,
        ref_embeddings=(first_frame_embeddings, prev_frame_embeddings)
    )
    predictions = tf.cast(predictions[common.OUTPUT_TYPE], tf.int32)
    return predictions, softmax_probabilities, embeddings
Esempio n. 2
0
    def predict(args, img):
        """Predicts segmentation labels and softmax probabilities for each image.

    Args:
      args: tuple of
        (predictions, softmax_probabilities, last_frame_embeddings), where
        predictions is an int tensor of shape [1, h, w],
        softmax_probabilities is a float32 tensor of shape
        [1, h_decoder, w_decoder, n_objects],
        and last_frame_embeddings is a float32 tensor of shape
        [h_decoder, w_decoder, embedding_dimension].
      img: Image to predict labels for of shape [h, w, 3].

    Returns:
      predictions: The predicted labels as int tensor of shape [1, h, w].
      softmax_probabilities: The softmax probabilities of shape
        [1, h_decoder, w_decoder, n_objects].
    """
        (last_frame_predictions, last_softmax_probabilities,
         prev_frame_embeddings) = args
        ref_labels_to_use = tf.concat(
            [reference_labels, last_frame_predictions[..., tf.newaxis]],
            axis=0)

        predictions, softmax_probabilities, embeddings = model.predict_labels(
            img[tf.newaxis],
            model_options=model_options,
            image_pyramid=FLAGS.image_pyramid,
            embedding_dimension=FLAGS.embedding_dimension,
            reference_labels=ref_labels_to_use,
            k_nearest_neighbors=FLAGS.k_nearest_neighbors,
            use_softmax_feedback=FLAGS.use_softmax_feedback,
            initial_softmax_feedback=last_softmax_probabilities,
            embedding_seg_feature_dimension=FLAGS.
            embedding_seg_feature_dimension,
            embedding_seg_n_layers=FLAGS.embedding_seg_n_layers,
            embedding_seg_kernel_size=FLAGS.embedding_seg_kernel_size,
            embedding_seg_atrous_rates=FLAGS.embedding_seg_atrous_rates,
            also_return_softmax_probabilities=True,
            num_frames_per_video=1,
            normalize_nearest_neighbor_distances=FLAGS.
            normalize_nearest_neighbor_distances,
            also_attend_to_previous_frame=FLAGS.also_attend_to_previous_frame,
            use_local_previous_frame_attention=FLAGS.
            use_local_previous_frame_attention,
            previous_frame_attention_window_size=FLAGS.
            previous_frame_attention_window_size,
            use_first_frame_matching=FLAGS.use_first_frame_matching,
            also_return_embeddings=True,
            ref_embeddings=(first_frame_embeddings, prev_frame_embeddings))
        predictions = tf.cast(predictions[common.OUTPUT_TYPE], tf.int32)
        return predictions, softmax_probabilities, embeddings
Esempio n. 3
0
  def predict(args, imgs):
    """Predicts segmentation labels and softmax probabilities for each image.

    Args:
      args: A tuple of (predictions, softmax_probabilities), where predictions
        is an int tensor of shape [1, h, w] and softmax_probabilities is a
        float32 tensor of shape [1, h_decoder, w_decoder, n_objects].
      imgs: Either a one-tuple of the image to predict labels for of shape
        [h, w, 3], or pair of previous frame and current frame image.

    Returns:
      predictions: The predicted labels as int tensor of shape [1, h, w].
      softmax_probabilities: The softmax probabilities of shape
        [1, h_decoder, w_decoder, n_objects].
    """
    if FLAGS.save_embeddings:
      last_frame_predictions, last_softmax_probabilities, _ = args
    else:
      last_frame_predictions, last_softmax_probabilities = args

    if FLAGS.also_attend_to_previous_frame or FLAGS.use_softmax_feedback:
      ref_labels_to_use = tf.concat(
          [reference_labels, last_frame_predictions[..., tf.newaxis]],
          axis=0)
    else:
      ref_labels_to_use = reference_labels

    predictions, softmax_probabilities = model.predict_labels(
        tf.stack((first_frame_img,) + imgs),
        model_options=model_options,
        image_pyramid=FLAGS.image_pyramid,
        embedding_dimension=FLAGS.embedding_dimension,
        reference_labels=ref_labels_to_use,
        k_nearest_neighbors=FLAGS.k_nearest_neighbors,
        use_softmax_feedback=FLAGS.use_softmax_feedback,
        initial_softmax_feedback=last_softmax_probabilities,
        embedding_seg_feature_dimension=
        FLAGS.embedding_seg_feature_dimension,
        embedding_seg_n_layers=FLAGS.embedding_seg_n_layers,
        embedding_seg_kernel_size=FLAGS.embedding_seg_kernel_size,
        embedding_seg_atrous_rates=FLAGS.embedding_seg_atrous_rates,
        also_return_softmax_probabilities=True,
        num_frames_per_video=
        (3 if FLAGS.also_attend_to_previous_frame or
         FLAGS.use_softmax_feedback else 2),
        normalize_nearest_neighbor_distances=
        FLAGS.normalize_nearest_neighbor_distances,
        also_attend_to_previous_frame=FLAGS.also_attend_to_previous_frame,
        use_local_previous_frame_attention=
        FLAGS.use_local_previous_frame_attention,
        previous_frame_attention_window_size=
        FLAGS.previous_frame_attention_window_size,
        use_first_frame_matching=FLAGS.use_first_frame_matching
    )
    predictions = tf.cast(predictions[common.OUTPUT_TYPE], tf.int32)

    if FLAGS.save_embeddings:
      names = [n.name for n in tf.get_default_graph().as_graph_def().node]
      embedding_names = [x for x in names if 'embeddings' in x]
      # This will crash when multi-scale inference is used.
      assert len(embedding_names) == 1, len(embedding_names)
      embedding_name = embedding_names[0] + ':0'
      embeddings = tf.get_default_graph().get_tensor_by_name(embedding_name)
      return predictions, softmax_probabilities, embeddings
    else:
      return predictions, softmax_probabilities
Esempio n. 4
0
    def predict(args, imgs):
        """Predicts segmentation labels and softmax probabilities for each image.

    Args:
      args: A tuple of (predictions, softmax_probabilities), where predictions
        is an int tensor of shape [1, h, w] and softmax_probabilities is a
        float32 tensor of shape [1, h_decoder, w_decoder, n_objects].
      imgs: Either a one-tuple of the image to predict labels for of shape
        [h, w, 3], or pair of previous frame and current frame image.

    Returns:
      predictions: The predicted labels as int tensor of shape [1, h, w].
      softmax_probabilities: The softmax probabilities of shape
        [1, h_decoder, w_decoder, n_objects].
    """
        if FLAGS.save_embeddings:
            last_frame_predictions, last_softmax_probabilities, _ = args
        else:
            last_frame_predictions, last_softmax_probabilities = args

        if FLAGS.also_attend_to_previous_frame or FLAGS.use_softmax_feedback:
            ref_labels_to_use = tf.concat(
                [reference_labels, last_frame_predictions[..., tf.newaxis]],
                axis=0)
        else:
            ref_labels_to_use = reference_labels

        predictions, softmax_probabilities = model.predict_labels(
            tf.stack((first_frame_img, ) + imgs),
            model_options=model_options,
            image_pyramid=FLAGS.image_pyramid,
            embedding_dimension=FLAGS.embedding_dimension,
            reference_labels=ref_labels_to_use,
            k_nearest_neighbors=FLAGS.k_nearest_neighbors,
            use_softmax_feedback=FLAGS.use_softmax_feedback,
            initial_softmax_feedback=last_softmax_probabilities,
            embedding_seg_feature_dimension=FLAGS.
            embedding_seg_feature_dimension,
            embedding_seg_n_layers=FLAGS.embedding_seg_n_layers,
            embedding_seg_kernel_size=FLAGS.embedding_seg_kernel_size,
            embedding_seg_atrous_rates=FLAGS.embedding_seg_atrous_rates,
            also_return_softmax_probabilities=True,
            num_frames_per_video=(3 if FLAGS.also_attend_to_previous_frame
                                  or FLAGS.use_softmax_feedback else 2),
            normalize_nearest_neighbor_distances=FLAGS.
            normalize_nearest_neighbor_distances,
            also_attend_to_previous_frame=FLAGS.also_attend_to_previous_frame,
            use_local_previous_frame_attention=FLAGS.
            use_local_previous_frame_attention,
            previous_frame_attention_window_size=FLAGS.
            previous_frame_attention_window_size,
            use_first_frame_matching=FLAGS.use_first_frame_matching)
        predictions = tf.cast(predictions[common.OUTPUT_TYPE], tf.int32)

        if FLAGS.save_embeddings:
            names = [
                n.name for n in tf.get_default_graph().as_graph_def().node
            ]
            embedding_names = [x for x in names if 'embeddings' in x]
            # This will crash when multi-scale inference is used.
            assert len(embedding_names) == 1, len(embedding_names)
            embedding_name = embedding_names[0] + ':0'
            embeddings = tf.get_default_graph().get_tensor_by_name(
                embedding_name)
            return predictions, softmax_probabilities, embeddings
        else:
            return predictions, softmax_probabilities