def sample_from_instances(inputs, outputs, num_samples): """Samples equally from instances.""" input_tensors = {} output_tensors = {} input_tensors[ standard_fields.InputDataFields.object_instance_id_image] = tf.reshape( inputs[standard_fields.InputDataFields.object_instance_id_image], [-1]) input_tensors[standard_fields.InputDataFields. object_instance_id_image] = isu.map_labels_to_0_to_n( input_tensors[standard_fields.InputDataFields. object_instance_id_image]) seed_indices = isu.randomly_select_n_points_per_segment( labels=input_tensors[ standard_fields.InputDataFields.object_instance_id_image], num_points=num_samples, include_ignore_label=False) seed_indices = tf.reshape(seed_indices, [-1]) for field in standard_fields.get_input_image_fields(): if field in inputs: input_tensors[field] = tf.gather(inputs[field], seed_indices) for field in standard_fields.get_output_image_fields(): if field in outputs: output_tensors[field] = tf.gather(outputs[field], seed_indices) return input_tensors, output_tensors
def instance_embedding_iou_loss(embedding, instance_labels, num_samples, similarity_strategy='dotproduct'): """IOU loss on soft masks predicted from embedding. Here is how this loss is implemented. First draws one random seed point from each instance. Then it computes the similarity between each pixel embedding and each of the seed embedding vectors. Ideally, we like pixels that are in the same instance with a seed have a large dot product with the embedding of that seed and pixels that are in other instances have a very small dot product with the embedding of that seed. Assume we call the embedding vector of the seed as s and embedding vector of a pixel as e. For each seed, we build a soft mask where the mask value at each pixel is exp(e.s)/exp(s.s). For each seed mask, we compute its loss as 1.0 - IOU with the ground-truth instance that it corresponds to. We average these values to compute the final loss. This process can happen multiple times. Each time we sample seeds at random, compute the loss and at the end average the losses. The argument num_samples defines how many times to repeat this process. Args: embedding: A tf.float32 tensor of [height, width, dims]. instance_labels: A tf.int32 tensor of [height, width] containing instance ids. Assumed values in target start from 0 and cover 0 to N-1. num_samples: Number of samples. similarity_strategy: Defines the method for computing similarity between embedding vectors. Possible values are 'dotproduct' and 'distance'. Returns: Iou loss of the sigmoid of masks that are grown from random points. Scalar tensor of type tf.float32. """ embedding_shape = tf.shape(embedding) height = embedding_shape[0] width = embedding_shape[1] dims = embedding_shape[2] embedding = tf.reshape(embedding, tf.stack([-1, dims])) instance_labels = tf.reshape(instance_labels, [-1]) num_inst = tf.reduce_max(instance_labels) + 1 indices = isu.randomly_select_n_points_per_segment(instance_labels, num_samples) indices = tf.reshape(indices, [-1]) init_centers = tf.gather(embedding, indices) soft_masks = eutils.embedding_centers_to_soft_masks( embedding, init_centers, similarity_strategy) soft_masks = tf.reshape(soft_masks, tf.stack([num_samples * num_inst, height, width])) instance_masks = tf.one_hot(instance_labels, num_inst, dtype=tf.float32) instance_masks = tf.transpose(instance_masks) instance_masks = tf.tile(tf.expand_dims(instance_masks, axis=0), tf.stack([num_samples, 1, 1])) instance_masks = tf.reshape(instance_masks, tf.stack([num_samples * num_inst, height, width])) # Loss on pixels inside ground-truth segment loss_fn = tf.keras.losses.MeanSquaredError() losses = loss_fn( y_pred=tf.reshape(soft_masks, [-1]), y_true=tf.reshape(instance_masks, [-1])) loss1 = tf.reduce_mean(losses * tf.reshape(instance_masks, [-1])) # Loss on pixels outside ground-truth segment loss2 = tf.reduce_mean(losses * (1.0 - tf.reshape(instance_masks, [-1]))) # Loss on pixels in the difference between ground-truth and predicted segments diff_weights = tf.maximum(soft_masks, instance_masks) - tf.minimum( soft_masks, instance_masks) loss3 = tf.reduce_mean(losses * tf.reshape(diff_weights, [-1])) # IOU loss loss4 = tf.reduce_mean( losses * tf.reshape(tf.maximum(instance_masks, soft_masks), [-1])) return (loss1 + loss2 + loss3 + loss4) / 4.0
def instance_embedding_npair_random_sample_loss( embedding, instance_labels, num_samples, similarity_strategy='dotproduct', loss_strategy='softmax'): """Compute n-pair loss by drawing random samples from segments. Given n samples from each of the k instance, we create a [k x n x n, k] matrix of dot products. We compute the softmax loss for the rows of this matrix and average. Args: embedding: A tf.float32 tensor of [height, width, dims]. instance_labels: A tf.int32 tensor of [height, width] containing instance ids. Assumed values in target start from 0 and cover 0 to N-1. num_samples: Number of sampled points from each label. similarity_strategy: Defines the method for computing similarity between embedding vectors. Possible values are 'dotproduct' and 'distance'. loss_strategy: Defines the type of loss including 'softmax' or 'sigmoid'. Returns: Total loss value. Raises: ValueError: If loss or similarity strategy are unknown. """ embedding_shape = tf.shape(embedding) embedding_dims = embedding_shape[2] embedding = tf.reshape(embedding, tf.stack([-1, embedding_dims])) instance_labels = tf.reshape(instance_labels, [-1]) valid_mask = tf.greater_equal(instance_labels, 0) embedding = tf.boolean_mask(embedding, valid_mask) instance_labels = tf.boolean_mask(instance_labels, valid_mask) num_instances = tf.reduce_max(instance_labels) + 1 # num_samples (ns) x num_instances (ni) sampled_indices = isu.randomly_select_n_points_per_segment(instance_labels, num_samples) sampled_indices_ns_ni = tf.reshape(sampled_indices, [-1]) sampled_indices = tf.transpose(sampled_indices) sampled_indices_ni_ns = tf.reshape(sampled_indices, [-1]) sampled_embedding_ns_ni = tf.gather(embedding, sampled_indices_ns_ni) sampled_embedding_ni_ns = tf.gather(embedding, sampled_indices_ni_ns) sampled_instance_label_ns_ni = tf.gather(instance_labels, sampled_indices_ns_ni) sampled_instance_label_ns_ni = tf.one_hot(sampled_instance_label_ns_ni, num_instances) sampled_instance_label_ni_ns = tf.gather(instance_labels, sampled_indices_ni_ns) sampled_instance_label_ni_ns = tf.one_hot(sampled_instance_label_ni_ns, num_instances) # [nc x ns, ns x nc] target_one_hot = tf.matmul(sampled_instance_label_ni_ns, sampled_instance_label_ns_ni, transpose_b=True) embedding_similarity = eutils.embedding_centers_to_logits( sampled_embedding_ns_ni, sampled_embedding_ni_ns, similarity_strategy) target_one_hot = tf.reshape(target_one_hot, [-1, num_instances]) embedding_similarity = tf.reshape(embedding_similarity, [-1, num_instances]) if loss_strategy == 'softmax': loss_fn = tf.keras.losses.CategoricalCrossentropy( from_logits=True, reduction=tf.keras.losses.Reduction.NONE) losses = loss_fn( y_true=target_one_hot, y_pred=embedding_similarity) return tf.reduce_mean(losses) elif loss_strategy == 'sigmoid': if similarity_strategy == 'distance': target_one_hot *= 0.5 loss_fn = tf.keras.losses.BinaryCrossentropy( from_logits=True, reduction=tf.keras.losses.Reduction.NONE) losses = loss_fn( y_true=target_one_hot, y_pred=embedding_similarity) return tf.reduce_mean(losses) else: raise ValueError('Loss strategy is unknown')
def instance_embedding_npair_random_center_random_sample_loss( embedding, instance_labels, num_samples, similarity_strategy='dotproduct', loss_strategy='softmax'): """Compute n-pair loss by drawing random points and samples from instances. This loss is very similar to instance_embedding_npair_random_center_loss, with the difference that instead of computing the dot product of every pixel with the seeds(centers), we compute the dot product of sampled points with the centers. This strategy has the advantage of an even distribution over predicted instances in the loss. In particular, large object instances will no longer contribute an overwhelming number of the terms in the loss, a limitation inherent to `npair_loss`. Args: embedding: A tf.float32 tensor of [height, width, dims]. instance_labels: A tf.int32 tensor of [height, width] containing instance ids. Assumed values in target start from 0 and cover 0 to N-1. num_samples: Number of sampled points from each label. similarity_strategy: Defines the method for computing similarity between embedding vectors. Possible values are 'dotproduct' and 'distance'. loss_strategy: Defines the type of loss including 'softmax' or 'sigmoid'. Returns: Total loss value. Raises: ValueError: If strategy is unknown. """ instance_labels = tf.reshape(instance_labels, [-1]) embedding_shape = tf.shape(embedding) embedding_dims = embedding_shape[2] embedding = tf.reshape(embedding, tf.stack([-1, embedding_dims])) valid_mask = tf.greater_equal(instance_labels, 0) embedding = tf.boolean_mask(embedding, valid_mask) instance_labels = tf.boolean_mask(instance_labels, valid_mask) (center_indices, instance_label_one_hot) = isu.randomly_select_one_point_per_segment( instance_labels) centers = tf.gather(embedding, center_indices) sampled_indices = isu.randomly_select_n_points_per_segment(instance_labels, num_samples) sampled_indices = tf.reshape(sampled_indices, [-1]) sampled_embedding = tf.gather(embedding, sampled_indices) sampled_instance_label_one_hot = tf.gather(instance_label_one_hot, sampled_indices) embedding_similarity = eutils.embedding_centers_to_logits(centers, sampled_embedding, similarity_strategy) if loss_strategy == 'softmax': loss_fn = tf.keras.losses.CategoricalCrossentropy( from_logits=True, reduction=tf.keras.losses.Reduction.NONE) losses = loss_fn( y_true=sampled_instance_label_one_hot, y_pred=embedding_similarity) return tf.reduce_mean(losses) elif loss_strategy == 'sigmoid': if similarity_strategy == 'distance': sampled_instance_label_one_hot *= 0.5 loss_fn = tf.keras.losses.BinaryCrossentropy( from_logits=True, reduction=tf.keras.losses.Reduction.NONE) losses = loss_fn( y_true=sampled_instance_label_one_hot, y_pred=embedding_similarity) return tf.reduce_mean(losses) else: raise ValueError('Loss strategy is unknown')