def get_support_set_softmax(self, logits, class_ids): """Softmax normalize over the support set. Args: logits: [N_k, H*W, Q] dimensional tensor. class_ids: [N_k] tensor giving the support-set-id of each image. Returns: Softmax-ed x over the support set. softmax(x) = np.exp(x) / np.reduce_sum(np.exp(x), axis) """ max_logit = tf.reduce_max(logits, axis=1, keepdims=True) max_logit = tf.math.unsorted_segment_max(max_logit, class_ids, tf.reduce_max(class_ids) + 1) max_logit = tf.gather(max_logit, class_ids) logits_reduc = logits - max_logit exp_x = tf.exp(logits_reduc) sum_exp_x = tf.reduce_sum(exp_x, axis=1, keepdims=True) sum_exp_x = tf.math.unsorted_segment_sum(sum_exp_x, class_ids, tf.reduce_max(class_ids) + 1) log_sum_exp_x = tf.log(sum_exp_x) log_sum_exp_x = tf.gather(log_sum_exp_x, class_ids) norm_logits = logits_reduc - log_sum_exp_x softmax = tf.exp(norm_logits) return softmax
def validate_model_independence(self, labels, log_probs, task_parameters): """Partition gradients into those assumed active and inactive.""" num_task_parameters = len(task_parameters) # pylint: disable=g-complex-comprehension on_gradients = [[ tf.norm(tensor=on_gradient) for on_gradient in on_gradients ] for on_gradients in [ tf.gradients(ys=tf.gather(log_probs, tf.compat.v1.where(tf.equal(labels, i))), xs=task_parameters[i * num_task_parameters:(i + 1) * num_task_parameters]) for i in range(1) ]] off_gradients = [[ tf.norm(tensor=off_gradient) for off_gradient in off_gradients ] for off_gradients in [ tf.gradients(ys=tf.gather(log_probs, tf.compat.v1.where(tf.equal(labels, i))), xs=task_parameters[i * num_task_parameters:(i + 1) * num_task_parameters]) for i in range(1) ]] # pylint: enable=g-complex-comprehension return (list(itertools.chain.from_iterable(on_gradients)), list(itertools.chain.from_iterable(off_gradients)))
def select_slate_greedy(slate_size, s_no_click, s, q): """Selects the slate using the adaptive greedy algorithm. This algorithm corresponds to the method "GS" in Ie et al. https://arxiv.org/abs/1905.12767. Args: slate_size: int, the size of the recommendation slate. s_no_click: float tensor, the score for not clicking any document. s: [num_of_documents] tensor, the scores for clicking documents. q: [num_of_documents] tensor, the predicted q values for documents. Returns: [slate_size] tensor, the selected slate. """ def argmax(v, mask): return tf.argmax((v - tf.reduce_min(v) + 1) * mask, axis=0) numerator = tf.constant(0.) denominator = tf.constant(0.) + s_no_click mask = tf.ones(tf.shape(q)[0]) def set_element(v, i, x): mask = tf.one_hot(i, tf.shape(v)[0]) v_new = tf.ones_like(v) * x return tf.where(tf.equal(mask, 1), v_new, v) for _ in range(slate_size): k = argmax((numerator + s * q) / (denominator + s), mask) mask = set_element(mask, k, 0) numerator = numerator + tf.gather(s * q, k) denominator = denominator + tf.gather(s, k) output_slate = tf.where(tf.equal(mask, 0)) return output_slate
def _restrict_to_source(self, one_hot_labels, source): """Returns the slice of one_hot_labels corresponding to source.""" return tf.slice( one_hot_labels, begin=[0, tf.gather(self._start_inds_for_sources, source)], size=[ tf.shape(one_hot_labels)[0], tf.gather(self.logit_dim, source) ])
def compute_train_class_proportions(episode, shots, dataset_spec): """Computes the proportion of each class' examples in the support set. Args: episode: An EpisodeDataset. shots: A 1D Tensor whose length is the `way' of the episode that stores the shots for this episode. dataset_spec: A DatasetSpecification. Returns: class_props: A 1D Tensor whose length is the `way' of the episode, storing for each class the proportion of its examples that are in the support set. """ # Get the total number of examples of each class in the dataset. num_dataset_classes = len(dataset_spec.images_per_class) num_images_per_class = [ dataset_spec.get_total_images_per_class(class_id) for class_id in range(num_dataset_classes) ] # Get the (absolute) class ID's that appear in the episode. class_ids, _ = tf.unique(episode.train_class_ids) # [?, ] # Make sure that class_ids are valid indices of num_images_per_class. This is # important since tf.gather will fail silently and return zeros otherwise. num_classes = tf.shape(num_images_per_class)[0] check_valid_inds_op = tf.assert_less(class_ids, num_classes) with tf.control_dependencies([check_valid_inds_op]): # Get the total number of examples of each class that is in the episode. num_images_per_class = tf.gather(num_images_per_class, class_ids) # [?, ] # Get the proportions of examples of each class that appear in the train set. class_props = tf.truediv(shots, num_images_per_class) return class_props
def update_state(self, inputs, outputs): """Function that updates the metric state at each example. Args: inputs: A dictionary containing input tensors. outputs: A dictionary containing output tensors. Returns: Update op. """ detections_score = tf.reshape( outputs[standard_fields.DetectionResultFields.objects_score], [-1]) detections_class = tf.reshape( outputs[standard_fields.DetectionResultFields.objects_class], [-1]) num_detections = tf.shape(detections_score)[0] detections_instance_mask = tf.reshape( outputs[ standard_fields.DetectionResultFields.instance_segments_voxel_mask], [num_detections, -1]) gt_class = tf.reshape(inputs[standard_fields.InputDataFields.objects_class], [-1]) num_gt = tf.shape(gt_class)[0] gt_voxel_instance_ids = tf.reshape( inputs[standard_fields.InputDataFields.object_instance_id_voxels], [-1]) gt_instance_masks = tf.transpose( tf.one_hot(gt_voxel_instance_ids - 1, depth=num_gt, dtype=tf.float32)) for c in self.class_range: gt_mask_c = tf.equal(gt_class, c) num_gt_c = tf.math.reduce_sum(tf.cast(gt_mask_c, dtype=tf.int32)) gt_instance_masks_c = tf.boolean_mask(gt_instance_masks, gt_mask_c) detections_mask_c = tf.equal(detections_class, c) num_detections_c = tf.math.reduce_sum( tf.cast(detections_mask_c, dtype=tf.int32)) if num_detections_c == 0: continue det_scores_c = tf.boolean_mask(detections_score, detections_mask_c) det_instance_mask_c = tf.boolean_mask(detections_instance_mask, detections_mask_c) det_scores_c, sorted_indices = tf.math.top_k( det_scores_c, k=num_detections_c) det_instance_mask_c = tf.gather(det_instance_mask_c, sorted_indices) tp_c = tf.zeros([num_detections_c], dtype=tf.int32) if num_gt_c > 0: ious_c = instance_segmentation_utils.points_mask_iou( masks1=gt_instance_masks_c, masks2=det_instance_mask_c) max_overlap_gt_ids = tf.cast( tf.math.argmax(ious_c, axis=0), dtype=tf.int32) is_gt_box_detected = tf.zeros([num_gt_c], dtype=tf.int32) for i in tf.range(num_detections_c): gt_id = max_overlap_gt_ids[i] if (ious_c[gt_id, i] > self.iou_threshold and is_gt_box_detected[gt_id] == 0): tp_c = tf.maximum( tf.one_hot(i, num_detections_c, dtype=tf.int32), tp_c) is_gt_box_detected = tf.maximum( tf.one_hot(gt_id, num_gt_c, dtype=tf.int32), is_gt_box_detected) self.tp[c] = tf.concat([self.tp[c], tp_c], axis=0) self.scores[c] = tf.concat([self.scores[c], det_scores_c], axis=0) self.num_gt[c] += num_gt_c return tf.no_op()
def spatial_loss(truth_features, predicted_features, space_desc): feature_losses = [] for truth, prediction, spec in zip(truth_features, predicted_features, space_desc.features): if spec.type == FeatureType.CATEGORICAL: truth = tf.transpose(truth, (0, 2, 3, 1)) prediction = tf.transpose(prediction, (0, 2, 3, 1)) feature_losses.append( tf.losses.softmax_cross_entropy(truth, prediction)) summary_image = tf.argmax( tf.concat([truth, prediction], 2), 3) summary_image = tf.gather( palette[space_desc.index][spec.index], summary_image) tf.summary.image(spec.name, summary_image) else: feature_losses.append( tf.losses.mean_squared_error(truth, prediction)) summary_image = tf.concat([truth, prediction], 3) tf.summary.image(spec.name, tf.transpose(summary_image, (0, 2, 3, 1))) tf.summary.scalar(spec.name, feature_losses[-1]) return tf.reduce_mean(tf.stack(feature_losses))
def forward_pass_fc(self, embeddings, source): start_idx = tf.gather(self._start_inds_for_sources, source) num_classes = self.logit_dim # a list of the datasets' numbers of classes. with tf.variable_scope('fc', reuse=tf.AUTO_REUSE): logits = functional_classifiers.separate_head_linear_classifier( embeddings, num_classes, source, start_idx, self.cosine_classifier, self.cosine_logits_multiplier) return logits
def inner_objective(self, onehot_labels, predictions, iteration_idx): """Compute the inner-loop objective.""" # p(z, y), joint log-likelihood. joint_log_probs = self.joint_log_likelihood(onehot_labels, predictions) labels = tf.expand_dims(tf.argmax(input=onehot_labels, axis=-1), axis=-1) numerator = tf.gather(joint_log_probs, labels, axis=-1, batch_dims=1) # p(z), normalization constant. evidence = tf.reduce_logsumexp(input_tensor=joint_log_probs, axis=-1, keepdims=True) # p(y | z) if interpolation coefficient > 0 else p(z, y). # TODO(eringrant): This assumes that `interp` is either 1 or 0. # Adapt to a hybridized approach. interp = tf.gather(self.gen_disc_interpolation, iteration_idx) scale = tf.cond(pred=interp > 0.0, true_fn=lambda: 1.0, false_fn=lambda: self.generative_scaling) return -scale * tf.reduce_mean( input_tensor=numerator - interp * evidence, axis=0)
def tf_random_choice(inputs, n_samples): """ With replacement. Params: inputs (Tensor): Shape [n_states, n_features] n_samples (int): The number of random samples to take. Returns: sampled_inputs (Tensor): Shape [n_samples, n_features] """ # (1, n_states) since multinomial requires 2D logits. uniform_log_prob = tf.expand_dims(tf.zeros(tf.shape(inputs)[0]), 0) ind = tf.multinomial(uniform_log_prob, n_samples) ind = tf.squeeze(ind, 0, name="random_choice_ind") # (n_samples,) return tf.gather(inputs, ind, name="random_choice")
def compute_logits(self, support_embeddings, query_embeddings, onehot_support_labels): """Computes the class logits for the episode. Args: support_embeddings: A Tensor of size [num_support_images, embedding dim]. query_embeddings: A Tensor of size [num_query_images, embedding dim]. onehot_support_labels: A Tensor of size [batch size, way]. Returns: The query set logits as a [num_query_images, way] matrix. Raises: ValueError: Distance must be one of l2 or cosine. """ if self.knn_in_fc: # Recompute the support and query embeddings that were originally computed # in self.forward_pass() to be the fc layer activations. support_embeddings = self.forward_pass_fc(support_embeddings) query_embeddings = self.forward_pass_fc(query_embeddings) # ------------------------ K-NN look up ------------------------------- # For each testing example in an episode, we use its embedding # vector to look for the closest neighbor in all the training examples' # embeddings from the same episode and then assign the training example's # class label to the testing example as the predicted class label for it. if self.distance == 'l2': # [1, num_support, embed_dims] support_embeddings = tf.expand_dims(support_embeddings, axis=0) # [num_query, 1, embed_dims] query_embeddings = tf.expand_dims(query_embeddings, axis=1) # [num_query, num_support] distance = tf.norm(query_embeddings - support_embeddings, axis=2) elif self.distance == 'cosine': support_embeddings = tf.nn.l2_normalize(support_embeddings, axis=1) query_embeddings = tf.nn.l2_normalize(query_embeddings, axis=1) distance = -1 * tf.matmul( query_embeddings, support_embeddings, transpose_b=True) else: raise ValueError('Distance must be one of l2 or cosine.') # [num_query] _, indices = tf.nn.top_k(-distance, k=1) indices = tf.squeeze(indices, axis=1) # [num_query, num_classes] query_logits = tf.gather(onehot_support_labels, indices) return query_logits
def entropy_loss(self): with tf.name_scope('entropy_loss'): entropies = [ dist.entropy() for name, dist in self.model.policy.items() ] entropy = tf.reduce_mean(tf.add_n(entropies)) entropy_loss = -entropy * self.entropy_factor entropy_masked = tf.stack(entropies, axis=-1) * tf.gather( self.function_args_mask, self.input_actions['function_id']) entropy_masked = tf.reduce_mean(tf.reduce_sum(entropy_masked, axis=-1)) tf.summary.scalar('policy_entropy', entropy, family='entropy') tf.summary.scalar('policy_entropy_masked', entropy_masked, family='entropy') tf.summary.scalar('entropy_loss', entropy_loss, family='losses') return entropy_loss
def policy_loss(self): with tf.name_scope('policy_loss'): log_probs = [ dist.log_prob(self.input_actions[name]) for name, dist in self.model.policy.items() ] log_probs = tf.stack(log_probs, axis=-1) log_probs = log_probs * tf.gather( self.function_args_mask, self.input_actions['function_id']) advantage = self.input_returns - self.model.value policy_loss = -tf.reduce_mean( tf.reduce_sum(log_probs, axis=-1) * tf.stop_gradient(advantage)) * self.policy_factor tf.summary.scalar('policy_loss', policy_loss, family='losses') return policy_loss