def compute_logits(self, support_embeddings, query_embeddings, onehot_support_labels): """Computes the relation score of each query example to each prototype.""" # [n_test, 21, 21, n_features]. query_embed_shape = query_embeddings.shape.as_list() n_feature = query_embed_shape[3] out_shape = query_embed_shape[1:3] n_test = tf.shape(query_embeddings)[0] # [n_test, num_clases, 21, 21, n_feature]. # It is okay one of the elements in the list to be tensor. prototypes = compute_prototypes(support_embeddings, onehot_support_labels) prototype_extended = tf.tile(tf.expand_dims(prototypes, 0), [n_test, 1, 1, 1, 1]) # [num_clases, n_test, 21, 21, n_feature]. query_f_extended = tf.tile( tf.expand_dims(query_embeddings, 1), [1, tf.shape(onehot_support_labels)[-1], 1, 1, 1]) relation_pairs = tf.concat((prototype_extended, query_f_extended), 4) # relation_pairs.shape.as_list()[-3:] == [-1] + out_shape + [n_feature*2] relation_pairs = tf.reshape(relation_pairs, [-1] + out_shape + [n_feature * 2]) relationnet_dict = functional_backbones.relation_module( relation_pairs, 'relationnet') way = tf.shape(onehot_support_labels)[-1] relations = tf.reshape(relationnet_dict['output'], [-1, way]) return relations
def compute_class_distances(self, support_embeddings, onehot_support_labels, query_embeddings): """Return the relation score of each query example to each prototype.""" # `query_embeddings` is [num_examples, 21, 21, num_features]. out_shape = query_embeddings.shape.as_list()[1:] num_features = out_shape[-1] num_query_examples = tf.shape(input=query_embeddings)[0] # [num_classes, 19, 19, num_features]. prototypes = compute_prototypes(support_embeddings, onehot_support_labels) # [num_classes, 19, 19, num_features]. prototype_extended = tf.tile( tf.expand_dims(prototypes, 0), [num_query_examples] + [1] * (1 + len(out_shape))) # [num_query_examples, 19, 19, num_features]. way = onehot_support_labels.shape.as_list()[-1] query_extended = tf.tile( tf.expand_dims(query_embeddings, 1), [1, way] + [1] * len(out_shape)) relation_pairs = tf.concat((prototype_extended, query_extended), len(out_shape) + 1) # relation_pairs.shape.as_list()[-3:] == [-1] + out_shape + [num_features*2] relation_pairs = tf.reshape(relation_pairs, [-1] + out_shape[:-1] + [num_features * 2]) return tf.reshape(self.relation_module_fn(relation_pairs), [-1, way])
def _build_target_distribution(self): self._reshape_networks() batch_size = tf.shape(self._replay.rewards)[0] # size of rewards: batch_size x 1 rewards = self._replay.rewards[:, None] # size of tiled_support: batch_size x num_atoms tiled_support = tf.tile(self.support, [batch_size]) tiled_support = tf.reshape(tiled_support, [batch_size, self.num_atoms]) # size of target_support: batch_size x num_atoms is_terminal_multiplier = 1. - tf.cast(self._replay.terminals, tf.float32) # Incorporate terminal state to discount factor. # size of gamma_with_terminal: batch_size x 1 gamma_with_terminal = self.cumulative_gamma * is_terminal_multiplier gamma_with_terminal = gamma_with_terminal[:, None] target_support = rewards + gamma_with_terminal * tiled_support # size of next_probabilities: batch_size x num_actions x num_atoms next_probabilities = tf.contrib.layers.softmax( self._replay_next_logits) # size of next_qt: 1 x num_actions next_qt = tf.reduce_sum(self.support * next_probabilities, 2) # size of next_qt_argmax: 1 x batch_size next_qt_argmax = tf.argmax( next_qt + self._replay.next_legal_actions, axis=1)[:, None] batch_indices = tf.range(tf.to_int64(batch_size))[:, None] # size of next_qt_argmax: batch_size x 2 next_qt_argmax = tf.concat([batch_indices, next_qt_argmax], axis=1) # size of next_probabilities: batch_size x num_atoms next_probabilities = tf.gather_nd(next_probabilities, next_qt_argmax) return project_distribution(target_support, next_probabilities, self.support)
def embedding_regularization_loss(inputs, outputs, lambda_coef=0.0001, regularization_type='unit_length', is_intermediate=False): """Classification loss with an iou threshold. Args: inputs: A dictionary that contains num_valid_voxels - A tf.int32 tensor of size [batch_size]. instance_ids - A tf.int32 tensor of size [batch_size, n]. outputs: A dictionart that contains embeddings - A tf.float32 tensor of size [batch_size, n, f]. lambda_coef: Regularization loss coefficient. regularization_type: Regularization loss type. Supported values are 'msq' and 'unit_length'. 'msq' stands for 'mean square' which penalizes the embedding vectors if they have a length far from zero. 'unit_length' penalizes the embedding vectors if they have a length far from one. is_intermediate: True if applied to intermediate predictions; otherwise, False. Returns: A tf.float32 scalar loss tensor. """ instance_ids_key = standard_fields.InputDataFields.object_instance_id_voxels num_voxels_key = standard_fields.InputDataFields.num_valid_voxels if is_intermediate: embedding_key = ( standard_fields.DetectionResultFields .intermediate_instance_embedding_voxels) else: embedding_key = ( standard_fields.DetectionResultFields.instance_embedding_voxels) if instance_ids_key not in inputs: raise ValueError('instance_ids is missing in inputs.') if embedding_key not in outputs: raise ValueError('embedding is missing in outputs.') if num_voxels_key not in inputs: raise ValueError('num_voxels is missing in inputs.') batch_size = inputs[num_voxels_key].get_shape().as_list()[0] if batch_size is None: raise ValueError('batch_size is not defined at graph construction time.') num_valid_voxels = inputs[num_voxels_key] num_voxels = tf.shape(inputs[instance_ids_key])[1] valid_mask = tf.less( tf.tile(tf.expand_dims(tf.range(num_voxels), axis=0), [batch_size, 1]), tf.expand_dims(num_valid_voxels, axis=1)) valid_mask = tf.reshape(valid_mask, [-1]) embedding_dims = outputs[embedding_key].get_shape().as_list()[-1] if embedding_dims is None: raise ValueError( 'Embedding dimension is unknown at graph construction time.') embedding = tf.reshape(outputs[embedding_key], [-1, embedding_dims]) embedding = tf.boolean_mask(embedding, valid_mask) return metric_learning_losses.regularization_loss( embedding=embedding, lambda_coef=lambda_coef, regularization_type=regularization_type)
def compute_target_optimal_q(reward, gamma, next_actions, next_q_values, next_states, terminals): """Builds an op used as a target for the Q-value. This algorithm corresponds to the method "OT" in Ie et al. https://arxiv.org/abs/1905.12767.. Args: reward: [batch_size] tensor, the immediate reward. gamma: float, discount factor with the usual RL meaning. next_actions: [batch_size, slate_size] tensor, the next slate. next_q_values: [batch_size, num_of_documents] tensor, the q values of the documents in the next step. next_states: [batch_size, 1 + num_of_documents] tensor, the features for the user and the docuemnts in the next step. terminals: [batch_size] tensor, indicating if this is a terminal step. Returns: [batch_size] tensor, the target q values. """ scores, score_no_click = _get_unnormalized_scores(next_states) # Obtain all possible slates given current docs in the candidate set. slate_size = next_actions.get_shape().as_list()[1] num_candidates = next_q_values.get_shape().as_list()[1] mesh_args = [list(range(num_candidates))] * slate_size slates = tf.stack(tf.meshgrid(*mesh_args), axis=-1) slates = tf.reshape(slates, shape=(-1, slate_size)) # Filter slates that include duplicates to ensure each document is picked # at most once. unique_mask = tf.map_fn( lambda x: tf.equal(tf.size(input=x), tf.size(input=tf.unique(x)[0])), slates, dtype=tf.bool) # [num_of_slates, slate_size] slates = tf.boolean_mask(tensor=slates, mask=unique_mask) # [batch_size, num_of_slates, slate_size] next_q_values_slate = tf.gather(next_q_values, slates, axis=1) # [batch_size, num_of_slates, slate_size] scores_slate = tf.gather(scores, slates, axis=1) # [batch_size, num_of_slates] batch_size = next_states.get_shape().as_list()[0] score_no_click_slate = tf.reshape( tf.tile(score_no_click, tf.shape(input=slates)[:1]), [batch_size, -1]) # [batch_size, num_of_slates] next_q_target_slate = tf.reduce_sum( input_tensor=next_q_values_slate * scores_slate, axis=2) / (tf.reduce_sum(input_tensor=scores_slate, axis=2) + score_no_click_slate) next_q_target_max = tf.reduce_max(input_tensor=next_q_target_slate, axis=1) return reward + gamma * next_q_target_max * ( 1. - tf.cast(terminals, tf.float32))
def fn(): """Loss function for when number of input and output boxes is positive.""" if is_balanced: weights = loss_utils.get_balanced_loss_weights_multiclass( labels=input_boxes_instance_id) else: weights = tf.ones([tf.shape(input_boxes_instance_id)[0], 1], dtype=tf.float32) normalized_box_size = 5.0 predicted_boxes_length = output_boxes_length predicted_boxes_height = output_boxes_height predicted_boxes_width = output_boxes_width predicted_boxes_center = output_boxes_center predicted_boxes_rotation_matrix = output_boxes_rotation_matrix gt_boxes_length = input_boxes_length gt_boxes_height = input_boxes_height gt_boxes_width = input_boxes_width gt_boxes_center = input_boxes_center gt_boxes_rotation_matrix = input_boxes_rotation_matrix if loss_type in ['normalized_huber', 'normalized_euclidean']: predicted_boxes_length /= (gt_boxes_length / normalized_box_size) predicted_boxes_height /= (gt_boxes_height / normalized_box_size) predicted_boxes_width /= (gt_boxes_width / normalized_box_size) gt_boxes_length = tf.ones_like( gt_boxes_length, dtype=tf.float32) * normalized_box_size gt_boxes_height = tf.ones_like( gt_boxes_height, dtype=tf.float32) * normalized_box_size gt_boxes_width = tf.ones_like( gt_boxes_width, dtype=tf.float32) * normalized_box_size gt_box_corners = box_utils.get_box_corners_3d( boxes_length=gt_boxes_length, boxes_height=gt_boxes_height, boxes_width=gt_boxes_width, boxes_rotation_matrix=gt_boxes_rotation_matrix, boxes_center=gt_boxes_center) predicted_box_corners = box_utils.get_box_corners_3d( boxes_length=predicted_boxes_length, boxes_height=predicted_boxes_height, boxes_width=predicted_boxes_width, boxes_rotation_matrix=predicted_boxes_rotation_matrix, boxes_center=predicted_boxes_center) corner_weights = tf.tile(weights, [1, 8]) if loss_type in ['huber', 'normalized_huber']: loss_fn = tf.keras.losses.Huber( delta=delta, reduction=tf.keras.losses.Reduction.NONE) elif loss_type in [ 'normalized_absolute_difference', 'absolute_difference' ]: loss_fn = tf.keras.losses.MeanAbsoluteError( reduction=tf.keras.losses.Reduction.NONE) else: raise ValueError(('Unknown loss type %s.' % loss_type)) box_corner_losses = loss_fn(y_true=tf.reshape(gt_box_corners, [-1, 3]), y_pred=tf.reshape(predicted_box_corners, [-1, 3])) return tf.reduce_mean(box_corner_losses * tf.reshape(corner_weights, [-1]))
def _build_bisimulation_target(self): """Build the bisimulation target.""" batch_size = tf.shape(self.rewards_ph)[0] r1 = tf.tile([self.rewards_ph], [batch_size, 1]) r2 = tf.transpose(r1) reward_differences = tf.abs(r1 - r2) reward_differences = tf.reshape(reward_differences, (batch_size**2, 1)) next_state_distances = self.bisim_horizon_ph * self.s2_target_distances return reward_differences + self.gamma * next_state_distances
def classification_loss_using_mask_iou_func_unbatched( embeddings, instance_ids, sampled_embeddings, sampled_instance_ids, sampled_class_labels, sampled_logits, similarity_strategy, is_balanced): """Classification loss using mask iou. Args: embeddings: A tf.float32 tensor of size [n, f]. instance_ids: A tf.int32 tensor of size [n]. sampled_embeddings: A tf.float32 tensor of size [num_samples, f]. sampled_instance_ids: A tf.int32 tensor of size [num_samples]. sampled_class_labels: A tf.int32 tensor of size [num_samples, 1]. sampled_logits: A tf.float32 tensor of size [num_samples, num_classes]. similarity_strategy: Defines the method for computing similarity between embedding vectors. Possible values are 'dotproduct' and 'distance'. is_balanced: If True, the per-voxel losses are re-weighted to have equal total weight for foreground vs. background voxels. Returns: A tf.float32 loss scalar tensor. """ predicted_soft_masks = metric_learning_utils.embedding_centers_to_soft_masks( embedding=embeddings, centers=sampled_embeddings, similarity_strategy=similarity_strategy) predicted_masks = tf.cast(tf.greater(predicted_soft_masks, 0.5), dtype=tf.float32) gt_masks = tf.cast(tf.equal(tf.expand_dims(sampled_instance_ids, axis=1), tf.expand_dims(instance_ids, axis=0)), dtype=tf.float32) pairwise_iou = instance_segmentation_utils.points_mask_pairwise_iou( masks1=predicted_masks, masks2=gt_masks) num_classes = sampled_logits.get_shape().as_list()[1] sampled_class_labels_one_hot = tf.one_hot(indices=tf.reshape( sampled_class_labels, [-1]), depth=num_classes) sampled_class_labels_one_hot_fg = sampled_class_labels_one_hot[:, 1:] iou_coefs = tf.tile(tf.reshape(pairwise_iou, [-1, 1]), [1, num_classes - 1]) sampled_class_labels_one_hot_fg *= iou_coefs sampled_class_labels_one_hot_bg = tf.maximum( 1.0 - tf.math.reduce_sum( sampled_class_labels_one_hot_fg, axis=1, keepdims=True), 0.0) sampled_class_labels_one_hot = tf.concat( [sampled_class_labels_one_hot_bg, sampled_class_labels_one_hot_fg], axis=1) params = {} if is_balanced: weights = loss_utils.get_balanced_loss_weights_multiclass( labels=tf.expand_dims(sampled_instance_ids, axis=1)) params['weights'] = weights return classification_loss_fn(logits=sampled_logits, labels=sampled_class_labels_one_hot, **params)
def _expand_to_population(self, data): """Expand the input tensor to a population of replications Args: data (tf.Tensor): input data with shape [batch_size, ...] Returns: data_population (tf.Tensor) with shape [batch_size * self._population_size, ...]. For example data tensor [[a, b], [c, d]] and a population_size of 2, we have the following data_population tensor as output [[a, b], [a, b], [c, d], [c, d]] """ data_population = tf.tile(tf.expand_dims( data, 1), [1, self._population_size] + [1] * len(data.shape[1:])) data_population = tf.reshape(data_population, [-1] + data.shape[1:].as_list()) return data_population
def identity_knn_graph_unbatched(points, k): """Returns each points as its own neighbor k times. Args: points: A tf.float32 tensor of [N, D] where D is the point dimensions. k: Number of neighbors for each point. Returns: distances: A tf.float32 tensor of [N, k]. Distances is all zeros since each point is returned as its own neighbor. indices: A tf.int32 tensor of [N, k]. Each row will contain values that are identical to the index of that row. """ num_points = tf.shape(points)[0] indices = tf.expand_dims(tf.range(num_points), axis=1) indices = tf.tile(indices, [1, k]) distances = tf.zeros([num_points, k], dtype=tf.float32) return distances, indices
def _concat_states(self, states, transpose=False): """Concatenate all pairs of states in a batch. Args: states: Tensor, batch of states from which we will concatenate batch_size^2 pairs of states. transpose: bool, whether to concatenate states in transpose order. Returns: A batch_size^2 Tensor containing the concatenation of all elements in `states`. """ # tiled_states will have shape # [batch_size, batch_size, representation_dimension] and will be of the # following form (where \phi_1 is the representation of the state of the # first batch_element): # [ \phi_1 \phi_2 ... \phi_batch_size ] # [ \phi_1 \phi_2 ... \phi_batch_size ] # ... # [ \phi_1 \phi_2 ... \phi_batch_size ] batch_size = tf.shape(states)[0] tiled_states = tf.tile([states], [batch_size, 1, 1]) # transpose_tiled_states will have shape # [batch_size, batch_size, representation_dimension] and will be of the # following form (where \phi_1 is the representation of the state of the # first batch_element): # [ \phi_1 \phi_1 ... \phi_1 ] # [ \phi_2 \phi_2 ... \phi_2 ] # ... # [ \phi_batch_size \phi_batch_size ... \phi_batch_size ] transpose_tiled_states = tf.keras.backend.repeat(states, batch_size) # concat_states will be a # [batch_size, batch_size, representation_dimension*2] matrix containing the # concatenation of all pairs of states in the batch. if transpose: concat_states = tf.concat([transpose_tiled_states, tiled_states], 2) else: concat_states = tf.concat([tiled_states, transpose_tiled_states], 2) # We return a reshaped matrix which results in a new batch of size # batch_size ** 2. Resulting matrix will have shape # [batch_size**2, representation_dimension]. return tf.reshape(concat_states, (batch_size**2, 4))
def flip_normals_towards_viewpoint(points, normals, viewpoint): """Flips the normals to face towards the view point. Args: points: A tf.float32 tensor of size [N, 3]. normals: A tf.float32 tensor of size [N, 3]. viewpoint: A tf.float32 tensor of size [3]. Returns: flipped_normals: A tf.float32 tensor of size [N, 3]. """ # (viewpoint - point) view_vector = tf.expand_dims(viewpoint, axis=0) - points # Dot product between the (viewpoint - point) and the plane normal cos_theta = tf.expand_dims(tf.reduce_sum(view_vector * normals, axis=1), axis=1) # Revert normals where cos is negative. normals *= tf.sign(tf.tile(cos_theta, [1, 3])) return normals
def _build_op(self): x = self.x[:, :, 1:] c_x, c_y = self._split() r = NeuralEncoder(output_shape=None, scope="encoder")(c_x, c_y, keep_prob=self.keep_prob) r = tf.tile(r[:, None, :], (1, self.y_features.get_shape().as_list()[1], 1)) h = tf.layers.dense(tf.concat([r, self.y_features], axis=2), units=128, activation=tf.nn.tanh) h = tf.layers.dense(h, units=1, activation=None) r = tf.reduce_mean(h, axis=1) self.d = NeuralDecoder(scope="decoder")(r, x, keep_prob=self.keep_prob) self.h = self.d.loc
def identity_knn_graph(points, num_valid_points, k): # pylint: disable=unused-argument """Returns each points as its own neighbor k times. Args: points: A tf.float32 tensor of size [num_batches, N, D] where D is the point dimensions. num_valid_points: A tf.int32 tensor of size [num_batches] containing the number of valid points in each batch example. k: Number of neighbors for each point. Returns: distances: A tf.float32 tensor of [num_batches, N, k]. Distances is all zeros since each point is returned as its own neighbor. indices: A tf.int32 tensor of [num_batches, N, k]. Each row will contain values that are identical to the index of that row. """ num_batches = points.get_shape()[0] num_points = tf.shape(points)[1] indices = tf.expand_dims(tf.range(num_points), axis=1) indices = tf.tile(tf.expand_dims(indices, axis=0), [num_batches, 1, k]) distances = tf.zeros([num_batches, num_points, k], dtype=tf.float32) return distances, indices
def classification_loss_using_mask_iou(inputs, outputs, num_samples, max_instance_id=None, similarity_strategy='distance', is_balanced=True, is_intermediate=False): """Classification loss with an iou threshold. Args: inputs: A dictionary that contains num_valid_voxels - A tf.int32 tensor of size [batch_size]. instance_ids - A tf.int32 tensor of size [batch_size, n]. class_labels - A tf.int32 tensor of size [batch_size, n]. It is assumed that the background voxels are assigned to class 0. outputs: A dictionart that contains embeddings - A tf.float32 tensor of size [batch_size, n, f]. logits - A tf.float32 tensor of size [batch_size, n, num_classes]. It is assumed that background is class 0. num_samples: An int determining the number of samples. max_instance_id: If set, instance ids larger than that value will be ignored. If not set, it will be computed from instance_ids tensor. similarity_strategy: Defines the method for computing similarity between embedding vectors. Possible values are 'dotproduct' and 'distance'. is_balanced: If True, the per-voxel losses are re-weighted to have equal total weight for foreground vs. background voxels. is_intermediate: True if applied to intermediate predictions; otherwise, False. Returns: A tf.float32 scalar loss tensor. """ instance_ids_key = standard_fields.InputDataFields.object_instance_id_voxels class_labels_key = standard_fields.InputDataFields.object_class_voxels num_voxels_key = standard_fields.InputDataFields.num_valid_voxels if is_intermediate: embedding_key = (standard_fields.DetectionResultFields. intermediate_instance_embedding_voxels) logits_key = (standard_fields.DetectionResultFields. intermediate_object_semantic_voxels) else: embedding_key = ( standard_fields.DetectionResultFields.instance_embedding_voxels) logits_key = standard_fields.DetectionResultFields.object_semantic_voxels if instance_ids_key not in inputs: raise ValueError('instance_ids is missing in inputs.') if class_labels_key not in inputs: raise ValueError('class_labels is missing in inputs.') if num_voxels_key not in inputs: raise ValueError('num_voxels is missing in inputs.') if embedding_key not in outputs: raise ValueError('embedding is missing in outputs.') if logits_key not in outputs: raise ValueError('logits is missing in outputs.') batch_size = inputs[num_voxels_key].get_shape().as_list()[0] if batch_size is None: raise ValueError( 'batch_size is not defined at graph construction time.') num_valid_voxels = inputs[num_voxels_key] num_voxels = tf.shape(inputs[instance_ids_key])[1] valid_mask = tf.less( tf.tile(tf.expand_dims(tf.range(num_voxels), axis=0), [batch_size, 1]), tf.expand_dims(num_valid_voxels, axis=1)) return classification_loss_using_mask_iou_func( embeddings=outputs[embedding_key], logits=outputs[logits_key], instance_ids=tf.reshape(inputs[instance_ids_key], [batch_size, -1]), class_labels=inputs[class_labels_key], num_samples=num_samples, valid_mask=valid_mask, max_instance_id=max_instance_id, similarity_strategy=similarity_strategy, is_balanced=is_balanced)
def prepare_lidar_images_and_correspondences( inputs, resized_image_height, resized_image_width, camera_names=('front', 'front_left', 'front_right', 'side_left', 'side_right'), lidar_names=('top', 'front', 'side_left', 'side_right', 'rear')): """Integrates and returns the lidars, cameras and their correspondences. Args: inputs: A dictionary containing the images and point / pixel correspondences. resized_image_height: Target height of the images. resized_image_width: Target width of the images. camera_names: List of cameras to include images from. lidar_names: List of lidars to include point clouds from. Returns: A tf.float32 tensor of size [num_points, 3] containing point positions. A tf.float32 tensor of size [num_points, 1] containing point intensities. A tf.float32 tensor of size [num_points, 1] containing point elongations. A tf.float32 tensor of size [num_points, 3] containing point normals. A tf.float32 tensor of size [num_images, resized_image_height, resized_image_width, 3]. A tf.int32 tensor of size [num_images, num_points, 2]. Raises: ValueError: If camera_names or lidar_names are empty lists. """ if not camera_names: raise ValueError('camera_names should contain at least one name.') if not lidar_names: raise ValueError('lidar_names should contain at least one name.') (points_position, points_intensity, points_elongation, points_normal, points_in_image_frame_yx, points_in_image_frame_id) = _prepare_lidar_points( inputs=inputs, lidar_names=lidar_names) images = [] points_in_image_frame = [] for camera_name in camera_names: image_key = ('cameras/%s/image' % camera_name) image_height = tf.shape(inputs[image_key])[0] image_width = tf.shape(inputs[image_key])[1] height_ratio = tf.cast( resized_image_height, dtype=tf.float32) / tf.cast( image_height, dtype=tf.float32) width_ratio = tf.cast( resized_image_width, dtype=tf.float32) / tf.cast( image_width, dtype=tf.float32) if tf.executing_eagerly(): resize_method = tf.image.ResizeMethod.NEAREST_NEIGHBOR else: resize_method = tf.image.ResizeMethod.BILINEAR if inputs[image_key].dtype in [ tf.int8, tf.uint8, tf.int16, tf.uint16, tf.int32, tf.int64 ]: resize_method = tf.image.ResizeMethod.NEAREST_NEIGHBOR images.append( tf.image.resize( images=inputs[image_key], size=[resized_image_height, resized_image_width], method=resize_method, antialias=True)) camera_id = tf.cast(inputs[('cameras/%s/id' % camera_name)], dtype=tf.int32) valid_points = tf.equal(points_in_image_frame_id, camera_id) valid_points = tf.tile(valid_points, [1, 2]) point_coords = tf.cast( tf.cast(points_in_image_frame_yx, dtype=tf.float32) * tf.stack([height_ratio, width_ratio]), dtype=tf.int32) points_in_image_frame_camera = tf.where( valid_points, point_coords, -tf.ones_like(valid_points, dtype=tf.int32)) points_in_image_frame.append(points_in_image_frame_camera) num_images = len(images) images = tf.stack(images, axis=0) images.set_shape([num_images, resized_image_height, resized_image_width, 3]) points_in_image_frame = tf.stack(points_in_image_frame, axis=0) return { 'points_position': points_position, 'points_intensity': points_intensity, 'points_elongation': points_elongation, 'points_normal': points_normal, 'view_images': {'rgb_view': images}, 'view_indices_2d': {'rgb_view': points_in_image_frame} }
def npair_loss(inputs, outputs, num_samples, max_instance_id=None, similarity_strategy='distance', loss_strategy='softmax', is_intermediate=False): """N-pair metric learning loss for learning feature embeddings. Args: inputs: A dictionary that contains instance_ids - A tf.int32 tensor of size [batch_size, n]. valid_mask - A tf.bool tensor of size [batch_size, n] that is True when an element is valid and False if it needs to be ignored. By default the value is None which means it is not applied. outputs: A dictionary that contains embeddings - A tf.float32 tensor of size [batch_size, n, f]. num_samples: An int determinig the number of samples. max_instance_id: If set, instance ids larger than that value will be ignored. If not set, it will be computed from instance_ids tensor. similarity_strategy: Defines the method for computing similarity between embedding vectors. Possible values are 'dotproduct' and 'distance'. loss_strategy: Defines the type of loss including 'softmax' or 'sigmoid'. is_intermediate: True if applied to intermediate predictions; otherwise, False. Returns: A tf.float32 scalar loss tensor. """ instance_ids_key = standard_fields.InputDataFields.object_instance_id_voxels num_voxels_key = standard_fields.InputDataFields.num_valid_voxels if is_intermediate: embedding_key = ( standard_fields.DetectionResultFields .intermediate_instance_embedding_voxels) else: embedding_key = ( standard_fields.DetectionResultFields.instance_embedding_voxels) if instance_ids_key not in inputs: raise ValueError('object_instance_id_voxels is missing in inputs.') if num_voxels_key not in inputs: raise ValueError('num_voxels is missing in inputs.') if embedding_key not in outputs: raise ValueError('embedding key is missing in outputs.') batch_size = inputs[num_voxels_key].get_shape().as_list()[0] if batch_size is None: raise ValueError('batch_size is not defined at graph construction time.') num_valid_voxels = inputs[num_voxels_key] num_voxels = tf.shape(inputs[instance_ids_key])[1] valid_mask = tf.less( tf.tile(tf.expand_dims(tf.range(num_voxels), axis=0), [batch_size, 1]), tf.expand_dims(num_valid_voxels, axis=1)) return npair_loss_func( embeddings=outputs[embedding_key], instance_ids=tf.reshape(inputs[instance_ids_key], [batch_size, -1]), num_samples=num_samples, valid_mask=valid_mask, max_instance_id=max_instance_id, similarity_strategy=similarity_strategy, loss_strategy=loss_strategy)
def project_distribution(supports, weights, target_support, validate_args=False): """Projects a batch of (support, weights) onto target_support. Based on equation (7) in (Bellemare et al., 2017): https://arxiv.org/abs/1707.06887 In the rest of the comments we will refer to this equation simply as Eq7. This code is not easy to digest, so we will use a running example to clarify what is going on, with the following sample inputs: * supports = [[0, 2, 4, 6, 8], [1, 3, 4, 5, 6]] * weights = [[0.1, 0.6, 0.1, 0.1, 0.1], [0.1, 0.2, 0.5, 0.1, 0.1]] * target_support = [4, 5, 6, 7, 8] In the code below, comments preceded with 'Ex:' will be referencing the above values. Args: supports: Tensor of shape (batch_size, num_dims) defining supports for the distribution. weights: Tensor of shape (batch_size, num_dims) defining weights on the original support points. Although for the CategoricalDQN agent these weights are probabilities, it is not required that they are. target_support: Tensor of shape (num_dims) defining support of the projected distribution. The values must be monotonically increasing. Vmin and Vmax will be inferred from the first and last elements of this tensor, respectively. The values in this tensor must be equally spaced. validate_args: Whether we will verify the contents of the target_support parameter. Returns: A Tensor of shape (batch_size, num_dims) with the projection of a batch of (support, weights) onto target_support. Raises: ValueError: If target_support has no dimensions, or if shapes of supports, weights, and target_support are incompatible. """ target_support_deltas = target_support[1:] - target_support[:-1] # delta_z = `\Delta z` in Eq7. delta_z = target_support_deltas[0] validate_deps = [] supports.shape.assert_is_compatible_with(weights.shape) supports[0].shape.assert_is_compatible_with(target_support.shape) target_support.shape.assert_has_rank(1) if validate_args: # Assert that supports and weights have the same shapes. validate_deps.append( tf.Assert( tf.reduce_all(tf.equal(tf.shape(supports), tf.shape(weights))), [supports, weights])) # Assert that elements of supports and target_support have the same shape. validate_deps.append( tf.Assert( tf.reduce_all( tf.equal(tf.shape(supports)[1], tf.shape(target_support))), [supports, target_support])) # Assert that target_support has a single dimension. validate_deps.append( tf.Assert( tf.equal(tf.size(tf.shape(target_support)), 1), [target_support])) # Assert that the target_support is monotonically increasing. validate_deps.append( tf.Assert(tf.reduce_all(target_support_deltas > 0), [target_support])) # Assert that the values in target_support are equally spaced. validate_deps.append( tf.Assert( tf.reduce_all(tf.equal(target_support_deltas, delta_z)), [target_support])) with tf.control_dependencies(validate_deps): # Ex: `v_min, v_max = 4, 8`. v_min, v_max = target_support[0], target_support[-1] # Ex: `batch_size = 2`. batch_size = tf.shape(supports)[0] # `N` in Eq7. # Ex: `num_dims = 5`. num_dims = tf.shape(target_support)[0] # clipped_support = `[\hat{T}_{z_j}]^{V_max}_{V_min}` in Eq7. # Ex: `clipped_support = [[[ 4. 4. 4. 6. 8.]] # [[ 4. 4. 4. 5. 6.]]]`. clipped_support = tf.clip_by_value(supports, v_min, v_max)[:, None, :] # Ex: `tiled_support = [[[[ 4. 4. 4. 6. 8.] # [ 4. 4. 4. 6. 8.] # [ 4. 4. 4. 6. 8.] # [ 4. 4. 4. 6. 8.] # [ 4. 4. 4. 6. 8.]] # [[ 4. 4. 4. 5. 6.] # [ 4. 4. 4. 5. 6.] # [ 4. 4. 4. 5. 6.] # [ 4. 4. 4. 5. 6.] # [ 4. 4. 4. 5. 6.]]]]`. tiled_support = tf.tile([clipped_support], [1, 1, num_dims, 1]) # Ex: `reshaped_target_support = [[[ 4.] # [ 5.] # [ 6.] # [ 7.] # [ 8.]] # [[ 4.] # [ 5.] # [ 6.] # [ 7.] # [ 8.]]]`. reshaped_target_support = tf.tile(target_support[:, None], [batch_size, 1]) reshaped_target_support = tf.reshape(reshaped_target_support, [batch_size, num_dims, 1]) # numerator = `|clipped_support - z_i|` in Eq7. # Ex: `numerator = [[[[ 0. 0. 0. 2. 4.] # [ 1. 1. 1. 1. 3.] # [ 2. 2. 2. 0. 2.] # [ 3. 3. 3. 1. 1.] # [ 4. 4. 4. 2. 0.]] # [[ 0. 0. 0. 1. 2.] # [ 1. 1. 1. 0. 1.] # [ 2. 2. 2. 1. 0.] # [ 3. 3. 3. 2. 1.] # [ 4. 4. 4. 3. 2.]]]]`. numerator = tf.abs(tiled_support - reshaped_target_support) quotient = 1 - (numerator / delta_z) # clipped_quotient = `[1 - numerator / (\Delta z)]_0^1` in Eq7. # Ex: `clipped_quotient = [[[[ 1. 1. 1. 0. 0.] # [ 0. 0. 0. 0. 0.] # [ 0. 0. 0. 1. 0.] # [ 0. 0. 0. 0. 0.] # [ 0. 0. 0. 0. 1.]] # [[ 1. 1. 1. 0. 0.] # [ 0. 0. 0. 1. 0.] # [ 0. 0. 0. 0. 1.] # [ 0. 0. 0. 0. 0.] # [ 0. 0. 0. 0. 0.]]]]`. clipped_quotient = tf.clip_by_value(quotient, 0, 1) # Ex: `weights = [[ 0.1 0.6 0.1 0.1 0.1] # [ 0.1 0.2 0.5 0.1 0.1]]`. weights = weights[:, None, :] # inner_prod = `\sum_{j=0}^{N-1} clipped_quotient * p_j(x', \pi(x'))` # in Eq7. # Ex: `inner_prod = [[[[ 0.1 0.6 0.1 0. 0. ] # [ 0. 0. 0. 0. 0. ] # [ 0. 0. 0. 0.1 0. ] # [ 0. 0. 0. 0. 0. ] # [ 0. 0. 0. 0. 0.1]] # [[ 0.1 0.2 0.5 0. 0. ] # [ 0. 0. 0. 0.1 0. ] # [ 0. 0. 0. 0. 0.1] # [ 0. 0. 0. 0. 0. ] # [ 0. 0. 0. 0. 0. ]]]]`. inner_prod = clipped_quotient * weights # Ex: `projection = [[ 0.8 0.0 0.1 0.0 0.1] # [ 0.8 0.1 0.1 0.0 0.0]]`. projection = tf.reduce_sum(inner_prod, 3) projection = tf.reshape(projection, [batch_size, num_dims]) return projection
def _build_train_op(self, optimizer): """Build the TensorFlow graph used to learn the bisimulation metric. Args: optimizer: a tf.train optimizer. Returns: A TensorFlow op to minimize the bisimulation loss. """ self.online_network = tf.make_template('Online', self._network_template) self.target_network = tf.make_template('Target', self._network_template) self.s1_ph = tf.placeholder(tf.float64, (self.batch_size, 2), name='s1_ph') self.s2_ph = tf.placeholder(tf.float64, (self.batch_size, 2), name='s2_ph') self.s1_online_distances = self.online_network( self._concat_states(self.s1_ph)) self.s1_target_distances = self.target_network( self._concat_states(self.s1_ph)) self.s2_target_distances = self.target_network( self._concat_states(self.s2_ph)) self.action_ph = tf.placeholder(tf.int32, (self.batch_size,)) self.rewards_ph = tf.placeholder(tf.float64, (self.batch_size,)) # We use an expanding horizon for computing the distances. self.bisim_horizon_ph = tf.placeholder(tf.float64, ()) # bisimulation_target_1 = rew_diff + gamma * next_distance. bisimulation_target_1 = tf.stop_gradient(self._build_bisimulation_target()) # bisimulation_target_2 = curr_distance. bisimulation_target_2 = tf.stop_gradient(self.s1_target_distances) # We slowly taper in the maximum according to the bisim horizon. bisimulation_target = tf.maximum( bisimulation_target_1, bisimulation_target_2 * self.bisim_horizon_ph) # We zero-out diagonal entries, since those are estimating the distance # between a state and itself, which we know to be 0. diagonal_mask = 1.0 - tf.diag(tf.ones(self.batch_size, dtype=tf.float64)) diagonal_mask = tf.reshape(diagonal_mask, (self.batch_size**2, 1)) bisimulation_target *= diagonal_mask bisimulation_estimate = self.s1_online_distances # We start with a mask that includes everything. loss_mask = tf.ones(tf.shape(bisimulation_estimate)) # We have to enforce that states being compared are done only using the same # action. indicators = self.action_ph indicators = tf.cast(indicators, tf.float64) # indicators will initially have shape [batch_size], we first tile it: square_ids = tf.tile([indicators], [self.batch_size, 1]) # We subtract square_ids from its transpose: square_ids = square_ids - tf.transpose(square_ids) # At this point all zero-entries are the ones with equal IDs. # Now we would like to convert the zeros in this matrix to 1s, and make # everything else a 0. We can do this with the following operation: loss_mask = 1 - tf.abs(tf.sign(square_ids)) # Now reshape to match the shapes of the estimate and target. loss_mask = tf.reshape(loss_mask, (self.batch_size**2, 1)) larger_targets = bisimulation_target - bisimulation_estimate larger_targets_count = tf.reduce_sum( tf.cast(larger_targets > 0., tf.float64)) tf.summary.scalar('Learning/LargerTargets', larger_targets_count) tf.summary.scalar('Learning/NumUpdates', tf.count_nonzero(loss_mask)) tf.summary.scalar('Learning/BisimHorizon', self.bisim_horizon_ph) bisimulation_loss = tf.losses.mean_squared_error( bisimulation_target, bisimulation_estimate, weights=loss_mask) tf.summary.scalar('Learning/loss', bisimulation_loss) # Plot average distance between sampled representations. average_distance = tf.reduce_mean(bisimulation_estimate) tf.summary.scalar('Approx/AverageDistance', average_distance) return optimizer.minimize(bisimulation_loss)
def __call__(self, shape=None, dtype=None, partition_info=None): out = tf.matrix_solve_ls(self.K + self.reg * tf.eye(self.K.get_shape()[-1].value), self.d) out = tf.tile(out, (1, 1, shape[1])) return tf.reshape(out, shape)