def compute_vote_loss(est_data, gt_data): """ Compute vote loss: Match predicted votes to GT votes. Args: est_data, gt_data: dict (read-only) Returns: vote_loss: scalar Tensor Overall idea: If the seed point belongs to an object (votes_label_mask == 1), then we require it to vote for the object center. Each seed point may vote for multiple translations v1,v2,v3 A seed point may also be in the boxes of multiple objects: o1,o2,o3 with corresponding GT votes c1,c2,c3 Then the loss for this seed point is: min(d(v_i,c_j)) for i=1,2,3 and j=1,2,3 """ # Load ground truth votes and assign them to seed points batch_size = est_data['seed_xyz'].shape[0] num_seed = est_data['seed_xyz'].shape[1] # B,num_seed,3 vote_xyz = est_data['vote_xyz'] # B,num_seed*vote_factor,3 seed_inds = est_data['seed_inds'].long() # B,num_seed in [0,num_points-1] # Get groundtruth votes for the seed points # vote_label_mask: Use gather to select B,num_seed from B,num_point # non-object point has no GT vote mask = 0, object point has mask = 1 # vote_label: Use gather to select B,num_seed,9 from B,num_point,9 # with inds in shape B,num_seed,9 and 9 = GT_VOTE_FACTOR * 3 seed_gt_votes_mask = torch.gather(gt_data['vote_label_mask'], 1, seed_inds) seed_inds_expand = seed_inds.view(batch_size, num_seed, 1).repeat(1, 1, 3 * GT_VOTE_FACTOR) seed_gt_votes = torch.gather(gt_data['vote_label'], 1, seed_inds_expand) seed_gt_votes += est_data['seed_xyz'].repeat(1, 1, 3) # Compute the min of min of distance vote_xyz_reshape = vote_xyz.view( batch_size * num_seed, -1, 3) # from B,num_seed*vote_factor,3 to B*num_seed,vote_factor,3 seed_gt_votes_reshape = seed_gt_votes.view( batch_size * num_seed, GT_VOTE_FACTOR, 3) # from B,num_seed,3*GT_VOTE_FACTOR to B*num_seed,GT_VOTE_FACTOR,3 # A predicted vote to no where is not penalized as long as there is a good vote near the GT vote. dist1, _, dist2, _ = nn_distance(vote_xyz_reshape, seed_gt_votes_reshape, l1=True) votes_dist, _ = torch.min( dist2, dim=1) # (B*num_seed,vote_factor) to (B*num_seed,) votes_dist = votes_dist.view(batch_size, num_seed) vote_loss = torch.sum(votes_dist * seed_gt_votes_mask.float()) / ( torch.sum(seed_gt_votes_mask.float()) + 1e-6) return vote_loss
def compute_objectness_loss_boxnet(est_data, gt_data): """ Compute objectness loss for the proposals. Args: end_points: dict (read-only) Returns: objectness_loss: scalar Tensor objectness_label: (batch_size, num_seed) Tensor with value 0 or 1 objectness_mask: (batch_size, num_seed) Tensor with value 0 or 1 object_assignment: (batch_size, num_seed) Tensor with long int within [0,num_gt_object-1] """ # Associate proposal and GT objects by point-to-point distances aggregated_vote_xyz = est_data['aggregated_vote_xyz'] gt_center = gt_data['center_label'][:, :, 0:3] B = gt_center.shape[0] K = aggregated_vote_xyz.shape[1] K2 = gt_center.shape[1] dist1, ind1, dist2, _ = nn_distance(aggregated_vote_xyz, gt_center) # dist1: BxK, dist2: BxK2 # Generate objectness label and mask # NOTE: Different from VoteNet, here we use seed label as objectness label. seed_inds = est_data['seed_inds'].long() # B,num_seed in [0,num_points-1] seed_gt_votes_mask = torch.gather(gt_data['vote_label_mask'], 1, seed_inds) est_data['seed_labels'] = seed_gt_votes_mask aggregated_vote_inds = est_data['aggregated_vote_inds'] objectness_label = torch.gather( est_data['seed_labels'], 1, aggregated_vote_inds.long()) # select (B,K) from (B,1024) objectness_mask = torch.ones( (objectness_label.shape[0], objectness_label.shape[1])).cuda() # no ignore zone anymore # Compute objectness loss objectness_scores = est_data['objectness_scores'] criterion = nn.CrossEntropyLoss( torch.Tensor(OBJECTNESS_CLS_WEIGHTS).cuda(), reduction='none') objectness_loss = criterion(objectness_scores.transpose(2, 1), objectness_label) objectness_loss = torch.sum(objectness_loss * objectness_mask) / ( torch.sum(objectness_mask) + 1e-6) # Set assignment object_assignment = ind1 # (B,K) with values in 0,1,...,K2-1 return objectness_loss, objectness_label, objectness_mask, object_assignment
def compute_objectness_loss(est_data, gt_data): """ Compute objectness loss for the proposals. Args: end_points: dict (read-only) Returns: objectness_loss: scalar Tensor objectness_label: (batch_size, num_seed) Tensor with value 0 or 1 objectness_mask: (batch_size, num_seed) Tensor with value 0 or 1 object_assignment: (batch_size, num_seed) Tensor with long int within [0,num_gt_object-1] """ # Associate proposal and GT objects by point-to-point distances aggregated_vote_xyz = est_data['aggregated_vote_xyz'] gt_center = gt_data['center_label'][:, :, 0:3] B = gt_center.shape[0] K = aggregated_vote_xyz.shape[1] K2 = gt_center.shape[1] dist1, ind1, dist2, _ = nn_distance(aggregated_vote_xyz, gt_center) # dist1: BxK, dist2: BxK2 # Generate objectness label and mask # objectness_label: 1 if pred object center is within NEAR_THRESHOLD of any GT object # objectness_mask: 0 if pred object center is in gray zone (DONOTCARE), 1 otherwise euclidean_dist1 = torch.sqrt(dist1 + 1e-6) objectness_label = torch.zeros((B, K), dtype=torch.long).cuda() objectness_mask = torch.zeros((B, K)).cuda() objectness_label[euclidean_dist1 < NEAR_THRESHOLD] = 1 objectness_mask[euclidean_dist1 < NEAR_THRESHOLD] = 1 objectness_mask[euclidean_dist1 > FAR_THRESHOLD] = 1 # Compute objectness loss objectness_scores = est_data['objectness_scores'] objectness_loss = objectness_criterion(objectness_scores.transpose(2, 1), objectness_label) objectness_loss = torch.sum(objectness_loss * objectness_mask) / ( torch.sum(objectness_mask) + 1e-6) # Set assignment object_assignment = ind1 # (B,K) with values in 0,1,...,K2-1 return objectness_loss, objectness_label, objectness_mask, object_assignment
def compute_box_and_sem_cls_loss(est_data, gt_data, meta_data, config): """ Compute 3D bounding box and semantic classification loss. Args: est_data, gt_data, meta_data: dict (read-only) Returns: center_loss heading_cls_loss heading_reg_loss size_cls_loss size_reg_loss sem_cls_loss """ num_heading_bin = config.num_heading_bin num_size_cluster = config.num_size_cluster num_class = config.num_class mean_size_arr = config.mean_size_arr object_assignment = meta_data['object_assignment'] batch_size = object_assignment.shape[0] # Compute center loss pred_center = est_data['center'] gt_center = gt_data['center_label'][:, :, 0:3] dist1, ind1, dist2, _ = nn_distance(pred_center, gt_center) # dist1: BxK, dist2: BxK2 box_label_mask = gt_data['box_label_mask'] objectness_label = meta_data['objectness_label'].float() centroid_reg_loss1 = \ torch.sum(dist1*objectness_label)/(torch.sum(objectness_label)+1e-6) centroid_reg_loss2 = \ torch.sum(dist2*box_label_mask)/(torch.sum(box_label_mask)+1e-6) center_loss = centroid_reg_loss1 + centroid_reg_loss2 # Compute heading loss heading_class_label = torch.gather( gt_data['heading_class_label'], 1, object_assignment) # select (B,K) from (B,K2) heading_class_loss = criterion_heading_class( est_data['heading_scores'].transpose(2, 1), heading_class_label) # (B,K) heading_class_loss = torch.sum(heading_class_loss * objectness_label) / ( torch.sum(objectness_label) + 1e-6) heading_residual_label = torch.gather( gt_data['heading_residual_label'], 1, object_assignment) # select (B,K) from (B,K2) heading_residual_normalized_label = heading_residual_label / ( np.pi / num_heading_bin) # Ref: https://discuss.pytorch.org/t/convert-int-into-one-hot-format/507/3 heading_label_one_hot = torch.cuda.FloatTensor( batch_size, heading_class_label.shape[1], num_heading_bin).zero_() heading_label_one_hot.scatter_( 2, heading_class_label.unsqueeze(-1), 1) # src==1 so it's *one-hot* (B,K,num_heading_bin) heading_residual_normalized_loss = huber_loss(torch.sum( est_data['heading_residuals_normalized'] * heading_label_one_hot, -1) - heading_residual_normalized_label, delta=1.0) # (B,K) heading_residual_normalized_loss = torch.sum( heading_residual_normalized_loss * objectness_label) / (torch.sum(objectness_label) + 1e-6) # Compute size loss size_class_label = torch.gather( gt_data['size_class_label'], 1, object_assignment) # select (B,K) from (B,K2) size_class_loss = criterion_size_class(est_data['size_scores'].transpose( 2, 1), size_class_label) # (B,K) size_class_loss = torch.sum(size_class_loss * objectness_label) / ( torch.sum(objectness_label) + 1e-6) size_residual_label = torch.gather(gt_data['size_residual_label'], 1, object_assignment.unsqueeze(-1).repeat( 1, 1, 3)) # select (B,K,3) from (B,K2,3) size_label_one_hot = torch.cuda.FloatTensor(batch_size, size_class_label.shape[1], num_size_cluster).zero_() size_label_one_hot.scatter_( 2, size_class_label.unsqueeze(-1), 1) # src==1 so it's *one-hot* (B,K,num_size_cluster) size_label_one_hot_tiled = size_label_one_hot.unsqueeze(-1).repeat( 1, 1, 1, 3) # (B,K,num_size_cluster,3) predicted_size_residual_normalized = torch.sum( est_data['size_residuals_normalized'] * size_label_one_hot_tiled, 2) # (B,K,3) mean_size_arr_expanded = torch.from_numpy(mean_size_arr.astype( np.float32)).cuda().unsqueeze(0).unsqueeze( 0) # (1,1,num_size_cluster,3) mean_size_label = torch.sum(size_label_one_hot_tiled * mean_size_arr_expanded, 2) # (B,K,3) size_residual_label_normalized = size_residual_label / mean_size_label # (B,K,3) size_residual_normalized_loss = torch.mean( huber_loss(predicted_size_residual_normalized - size_residual_label_normalized, delta=1.0), -1) # (B,K,3) -> (B,K) size_residual_normalized_loss = torch.sum( size_residual_normalized_loss * objectness_label) / (torch.sum(objectness_label) + 1e-6) # 3.4 Semantic cls loss sem_cls_label = torch.gather(gt_data['sem_cls_label'], 1, object_assignment) # select (B,K) from (B,K2) sem_cls_loss = criterion_sem_cls(est_data['sem_cls_scores'].transpose( 2, 1), sem_cls_label) # (B,K) sem_cls_loss = torch.sum( sem_cls_loss * objectness_label) / (torch.sum(objectness_label) + 1e-6) return center_loss, heading_class_loss, heading_residual_normalized_loss, size_class_loss, size_residual_normalized_loss, sem_cls_loss
def get_proposal_id(self, end_points, data, mode='random', batch_sample_ids=None, DUMP_CONF_THRESH=-1.): ''' Get the proposal ids for completion training for the limited GPU RAM. :param end_points: estimated data from votenet. :param data: data source which contains gt contents. :return: ''' batch_size, MAX_NUM_OBJ = data['box_label_mask'].shape device = end_points['center'].device NUM_PROPOSALS = end_points['center'].size(1) object_limit_per_scene = self.cfg.config['data'][ 'completion_limit_in_train'] proposal_id_list = [] if mode == 'objectness' or batch_sample_ids is not None: objectness_probs = torch.softmax(end_points['objectness_scores'], dim=2)[..., 1] for batch_id in range(batch_size): box_mask = torch.nonzero(data['box_label_mask'][batch_id]) gt_centroids = data['center_label'][batch_id, box_mask, 0:3].squeeze(1) dist1, object_assignment, _, _ = nn_distance( end_points['center'][batch_id].unsqueeze(0), gt_centroids.unsqueeze(0)) # dist1: BxK, dist2: BxK2 object_assignment = box_mask[object_assignment[0]].squeeze(-1) proposal_to_gt_box_w_cls = torch.cat([ torch.arange(0, NUM_PROPOSALS).unsqueeze(-1).to(device).long(), object_assignment.unsqueeze(-1) ], dim=-1) gt_classes = data['sem_cls_label'][batch_id][ proposal_to_gt_box_w_cls[:, 1]] proposal_to_gt_box_w_cls = torch.cat( [proposal_to_gt_box_w_cls, gt_classes.long().unsqueeze(-1)], dim=-1) if batch_sample_ids is None: if mode == 'random': sample_ids = torch.multinomial( torch.ones(size=(NUM_PROPOSALS, )), object_limit_per_scene, replacement=False) elif mode == 'nn': sample_ids = torch.argsort( dist1[0])[:object_limit_per_scene] elif mode == 'objectness': # sample_ids = torch.multinomial((objectness_probs[batch_id]>=self.cfg.eval_config['conf_thresh']).cpu().float(), num_samples=object_limit_per_scene, replacement=True) objectness_sort = torch.argsort(objectness_probs[batch_id], descending=True) gt_ids = np.unique( proposal_to_gt_box_w_cls[objectness_sort, 1].cpu().numpy(), return_index=True)[1] gt_ids = np.hstack([ gt_ids, np.setdiff1d(range(len(objectness_sort)), gt_ids, assume_unique=True) ])[:object_limit_per_scene] sample_ids = objectness_sort[gt_ids] else: raise NameError('Please specify a correct filtering mode.') else: sample_ids = (objectness_probs[batch_id] > DUMP_CONF_THRESH ).cpu().numpy() * batch_sample_ids[batch_id] proposal_to_gt_box_w_cls = proposal_to_gt_box_w_cls[ sample_ids].long() proposal_id_list.append(proposal_to_gt_box_w_cls.unsqueeze(0)) return torch.cat(proposal_id_list, dim=0)