def predict(args, img): """Predicts segmentation labels and softmax probabilities for each image. Args: args: tuple of (predictions, softmax_probabilities, last_frame_embeddings), where predictions is an int tensor of shape [1, h, w], softmax_probabilities is a float32 tensor of shape [1, h_decoder, w_decoder, n_objects], and last_frame_embeddings is a float32 tensor of shape [h_decoder, w_decoder, embedding_dimension]. img: Image to predict labels for of shape [h, w, 3]. Returns: predictions: The predicted labels as int tensor of shape [1, h, w]. softmax_probabilities: The softmax probabilities of shape [1, h_decoder, w_decoder, n_objects]. """ (last_frame_predictions, last_softmax_probabilities, prev_frame_embeddings) = args ref_labels_to_use = tf.concat( [reference_labels, last_frame_predictions[..., tf.newaxis]], axis=0) predictions, softmax_probabilities, embeddings = model.predict_labels( img[tf.newaxis], model_options=model_options, image_pyramid=FLAGS.image_pyramid, embedding_dimension=FLAGS.embedding_dimension, reference_labels=ref_labels_to_use, k_nearest_neighbors=FLAGS.k_nearest_neighbors, use_softmax_feedback=FLAGS.use_softmax_feedback, initial_softmax_feedback=last_softmax_probabilities, embedding_seg_feature_dimension= FLAGS.embedding_seg_feature_dimension, embedding_seg_n_layers=FLAGS.embedding_seg_n_layers, embedding_seg_kernel_size=FLAGS.embedding_seg_kernel_size, embedding_seg_atrous_rates=FLAGS.embedding_seg_atrous_rates, also_return_softmax_probabilities=True, num_frames_per_video=1, normalize_nearest_neighbor_distances= FLAGS.normalize_nearest_neighbor_distances, also_attend_to_previous_frame=FLAGS.also_attend_to_previous_frame, use_local_previous_frame_attention= FLAGS.use_local_previous_frame_attention, previous_frame_attention_window_size= FLAGS.previous_frame_attention_window_size, use_first_frame_matching=FLAGS.use_first_frame_matching, also_return_embeddings=True, ref_embeddings=(first_frame_embeddings, prev_frame_embeddings) ) predictions = tf.cast(predictions[common.OUTPUT_TYPE], tf.int32) return predictions, softmax_probabilities, embeddings
def predict(args, img): """Predicts segmentation labels and softmax probabilities for each image. Args: args: tuple of (predictions, softmax_probabilities, last_frame_embeddings), where predictions is an int tensor of shape [1, h, w], softmax_probabilities is a float32 tensor of shape [1, h_decoder, w_decoder, n_objects], and last_frame_embeddings is a float32 tensor of shape [h_decoder, w_decoder, embedding_dimension]. img: Image to predict labels for of shape [h, w, 3]. Returns: predictions: The predicted labels as int tensor of shape [1, h, w]. softmax_probabilities: The softmax probabilities of shape [1, h_decoder, w_decoder, n_objects]. """ (last_frame_predictions, last_softmax_probabilities, prev_frame_embeddings) = args ref_labels_to_use = tf.concat( [reference_labels, last_frame_predictions[..., tf.newaxis]], axis=0) predictions, softmax_probabilities, embeddings = model.predict_labels( img[tf.newaxis], model_options=model_options, image_pyramid=FLAGS.image_pyramid, embedding_dimension=FLAGS.embedding_dimension, reference_labels=ref_labels_to_use, k_nearest_neighbors=FLAGS.k_nearest_neighbors, use_softmax_feedback=FLAGS.use_softmax_feedback, initial_softmax_feedback=last_softmax_probabilities, embedding_seg_feature_dimension=FLAGS. embedding_seg_feature_dimension, embedding_seg_n_layers=FLAGS.embedding_seg_n_layers, embedding_seg_kernel_size=FLAGS.embedding_seg_kernel_size, embedding_seg_atrous_rates=FLAGS.embedding_seg_atrous_rates, also_return_softmax_probabilities=True, num_frames_per_video=1, normalize_nearest_neighbor_distances=FLAGS. normalize_nearest_neighbor_distances, also_attend_to_previous_frame=FLAGS.also_attend_to_previous_frame, use_local_previous_frame_attention=FLAGS. use_local_previous_frame_attention, previous_frame_attention_window_size=FLAGS. previous_frame_attention_window_size, use_first_frame_matching=FLAGS.use_first_frame_matching, also_return_embeddings=True, ref_embeddings=(first_frame_embeddings, prev_frame_embeddings)) predictions = tf.cast(predictions[common.OUTPUT_TYPE], tf.int32) return predictions, softmax_probabilities, embeddings
def predict(args, imgs): """Predicts segmentation labels and softmax probabilities for each image. Args: args: A tuple of (predictions, softmax_probabilities), where predictions is an int tensor of shape [1, h, w] and softmax_probabilities is a float32 tensor of shape [1, h_decoder, w_decoder, n_objects]. imgs: Either a one-tuple of the image to predict labels for of shape [h, w, 3], or pair of previous frame and current frame image. Returns: predictions: The predicted labels as int tensor of shape [1, h, w]. softmax_probabilities: The softmax probabilities of shape [1, h_decoder, w_decoder, n_objects]. """ if FLAGS.save_embeddings: last_frame_predictions, last_softmax_probabilities, _ = args else: last_frame_predictions, last_softmax_probabilities = args if FLAGS.also_attend_to_previous_frame or FLAGS.use_softmax_feedback: ref_labels_to_use = tf.concat( [reference_labels, last_frame_predictions[..., tf.newaxis]], axis=0) else: ref_labels_to_use = reference_labels predictions, softmax_probabilities = model.predict_labels( tf.stack((first_frame_img,) + imgs), model_options=model_options, image_pyramid=FLAGS.image_pyramid, embedding_dimension=FLAGS.embedding_dimension, reference_labels=ref_labels_to_use, k_nearest_neighbors=FLAGS.k_nearest_neighbors, use_softmax_feedback=FLAGS.use_softmax_feedback, initial_softmax_feedback=last_softmax_probabilities, embedding_seg_feature_dimension= FLAGS.embedding_seg_feature_dimension, embedding_seg_n_layers=FLAGS.embedding_seg_n_layers, embedding_seg_kernel_size=FLAGS.embedding_seg_kernel_size, embedding_seg_atrous_rates=FLAGS.embedding_seg_atrous_rates, also_return_softmax_probabilities=True, num_frames_per_video= (3 if FLAGS.also_attend_to_previous_frame or FLAGS.use_softmax_feedback else 2), normalize_nearest_neighbor_distances= FLAGS.normalize_nearest_neighbor_distances, also_attend_to_previous_frame=FLAGS.also_attend_to_previous_frame, use_local_previous_frame_attention= FLAGS.use_local_previous_frame_attention, previous_frame_attention_window_size= FLAGS.previous_frame_attention_window_size, use_first_frame_matching=FLAGS.use_first_frame_matching ) predictions = tf.cast(predictions[common.OUTPUT_TYPE], tf.int32) if FLAGS.save_embeddings: names = [n.name for n in tf.get_default_graph().as_graph_def().node] embedding_names = [x for x in names if 'embeddings' in x] # This will crash when multi-scale inference is used. assert len(embedding_names) == 1, len(embedding_names) embedding_name = embedding_names[0] + ':0' embeddings = tf.get_default_graph().get_tensor_by_name(embedding_name) return predictions, softmax_probabilities, embeddings else: return predictions, softmax_probabilities
def predict(args, imgs): """Predicts segmentation labels and softmax probabilities for each image. Args: args: A tuple of (predictions, softmax_probabilities), where predictions is an int tensor of shape [1, h, w] and softmax_probabilities is a float32 tensor of shape [1, h_decoder, w_decoder, n_objects]. imgs: Either a one-tuple of the image to predict labels for of shape [h, w, 3], or pair of previous frame and current frame image. Returns: predictions: The predicted labels as int tensor of shape [1, h, w]. softmax_probabilities: The softmax probabilities of shape [1, h_decoder, w_decoder, n_objects]. """ if FLAGS.save_embeddings: last_frame_predictions, last_softmax_probabilities, _ = args else: last_frame_predictions, last_softmax_probabilities = args if FLAGS.also_attend_to_previous_frame or FLAGS.use_softmax_feedback: ref_labels_to_use = tf.concat( [reference_labels, last_frame_predictions[..., tf.newaxis]], axis=0) else: ref_labels_to_use = reference_labels predictions, softmax_probabilities = model.predict_labels( tf.stack((first_frame_img, ) + imgs), model_options=model_options, image_pyramid=FLAGS.image_pyramid, embedding_dimension=FLAGS.embedding_dimension, reference_labels=ref_labels_to_use, k_nearest_neighbors=FLAGS.k_nearest_neighbors, use_softmax_feedback=FLAGS.use_softmax_feedback, initial_softmax_feedback=last_softmax_probabilities, embedding_seg_feature_dimension=FLAGS. embedding_seg_feature_dimension, embedding_seg_n_layers=FLAGS.embedding_seg_n_layers, embedding_seg_kernel_size=FLAGS.embedding_seg_kernel_size, embedding_seg_atrous_rates=FLAGS.embedding_seg_atrous_rates, also_return_softmax_probabilities=True, num_frames_per_video=(3 if FLAGS.also_attend_to_previous_frame or FLAGS.use_softmax_feedback else 2), normalize_nearest_neighbor_distances=FLAGS. normalize_nearest_neighbor_distances, also_attend_to_previous_frame=FLAGS.also_attend_to_previous_frame, use_local_previous_frame_attention=FLAGS. use_local_previous_frame_attention, previous_frame_attention_window_size=FLAGS. previous_frame_attention_window_size, use_first_frame_matching=FLAGS.use_first_frame_matching) predictions = tf.cast(predictions[common.OUTPUT_TYPE], tf.int32) if FLAGS.save_embeddings: names = [ n.name for n in tf.get_default_graph().as_graph_def().node ] embedding_names = [x for x in names if 'embeddings' in x] # This will crash when multi-scale inference is used. assert len(embedding_names) == 1, len(embedding_names) embedding_name = embedding_names[0] + ':0' embeddings = tf.get_default_graph().get_tensor_by_name( embedding_name) return predictions, softmax_probabilities, embeddings else: return predictions, softmax_probabilities