def compute_vote_loss(end_points, supervised_inds): """ Compute vote loss: Match predicted votes to GT votes. Args: end_points: dict (read-only) Returns: vote_loss: scalar Tensor Overall idea: If the seed point belongs to an object (votes_label_mask == 1), then we require it to vote for the object center. Each seed point may vote for multiple translations v1,v2,v3 A seed point may also be in the boxes of multiple objects: o1,o2,o3 with corresponding GT votes c1,c2,c3 Then the loss for this seed point is: min(d(v_i,c_j)) for i=1,2,3 and j=1,2,3 """ batch_size_supervised = supervised_inds.shape[0] # Load ground truth votes and assign them to seed points seed_xyz = end_points['seed_xyz'][supervised_inds, ...] # B_l, num_seed, 3 num_seed = seed_xyz.shape[1] vote_xyz = end_points['vote_xyz'][supervised_inds, ...] # B_l,num_seed*vote_factor,3 seed_inds = end_points['seed_inds'][ supervised_inds, ...].long() # B_l,num_seed in [0,num_points-1] vote_label_mask = end_points['vote_label_mask'] # B, num_point vote_label = end_points['vote_label'] # B, num_point, 9 seed_gt_votes_mask = torch.gather(vote_label_mask, 1, seed_inds) # B_l,num_seed seed_inds_expand = seed_inds.view(batch_size_supervised, num_seed, 1).expand(-1, -1, 3 * GT_VOTE_FACTOR) seed_gt_votes = torch.gather(vote_label, 1, seed_inds_expand) # B_l,num_seed,9 seed_gt_votes += seed_xyz.repeat(1, 1, 3) vote_xyz_reshape = vote_xyz.view( batch_size_supervised * num_seed, -1, 3) # from B_l,num_seed*vote_factor,3 to B_l*num_seed,vote_factor,3 seed_gt_votes_reshape = seed_gt_votes.view( batch_size_supervised * num_seed, GT_VOTE_FACTOR, 3 ) # from B_l,num_seed,3*GT_VOTE_FACTOR to B_l*num_seed,GT_VOTE_FACTOR,3 # A predicted vote to no where is not penalized as long as there is a good vote near the GT vote. dist1, _, dist2, _ = nn_distance(vote_xyz_reshape, seed_gt_votes_reshape, l1=True) votes_dist, _ = torch.min( dist2, dim=1) # (B_l*num_seed,vote_factor) to (B_l*num_seed,) votes_dist = votes_dist.view(batch_size_supervised, num_seed) vote_loss = torch.sum(votes_dist * seed_gt_votes_mask.float()) / ( torch.sum(seed_gt_votes_mask.float()) + 1e-6) return vote_loss
def compute_vote_loss(end_points): """ Compute vote loss: Match predicted votes to GT votes. Args: end_points: dict (read-only) Returns: vote_loss: scalar Tensor Overall idea: If the seed point belongs to an object (votes_label_mask == 1), then we require it to vote for the object center. Each seed point may vote for multiple translations v1,v2,v3 A seed point may also be in the boxes of multiple objects: o1,o2,o3 with corresponding GT votes c1,c2,c3 Then the loss for this seed point is: min(d(v_i,c_j)) for i=1,2,3 and j=1,2,3 """ # Load ground truth votes and assign them to seed points batch_size = end_points['seed_xyz'].shape[0] num_seed = end_points['seed_xyz'].shape[1] # B,num_seed,3 vote_xyz = end_points['vote_xyz'] # B,num_seed*vote_factor,3 seed_inds = end_points['seed_inds'].long( ) # B,num_seed in [0,num_points-1] # Get groundtruth votes for the seed points # vote_label_mask: Use gather to select B,num_seed from B,num_point # non-object point has no GT vote mask = 0, object point has mask = 1 # vote_label: Use gather to select B,num_seed,9 from B,num_point,9 # with inds in shape B,num_seed,9 and 9 = GT_VOTE_FACTOR * 3 seed_gt_votes_mask = torch.gather(end_points['vote_label_mask'], 1, seed_inds) seed_inds_expand = seed_inds.view(batch_size, num_seed, 1).repeat(1, 1, 3 * GT_VOTE_FACTOR) seed_gt_votes = torch.gather(end_points['vote_label'], 1, seed_inds_expand) seed_gt_votes += end_points['seed_xyz'].repeat(1, 1, 3) # Compute the min of min of distance vote_xyz_reshape = vote_xyz.view( batch_size * num_seed, -1, 3) # from B,num_seed*vote_factor,3 to B*num_seed,vote_factor,3 seed_gt_votes_reshape = seed_gt_votes.view( batch_size * num_seed, GT_VOTE_FACTOR, 3) # from B,num_seed,3*GT_VOTE_FACTOR to B*num_seed,GT_VOTE_FACTOR,3 # A predicted vote to no where is not penalized as long as there is a good vote near the GT vote. dist1, _, dist2, _ = nn_distance(vote_xyz_reshape, seed_gt_votes_reshape, l1=True) votes_dist, _ = torch.min( dist2, dim=1) # (B*num_seed,vote_factor) to (B*num_seed,) votes_dist = votes_dist.view(batch_size, num_seed) vote_loss = torch.sum(votes_dist * seed_gt_votes_mask.float()) / ( torch.sum(seed_gt_votes_mask.float()) + 1e-6) return vote_loss
def compute_objectness_loss(end_points, supervised_inds): """ Compute objectness loss for the proposals. Args: end_points: dict (read-only) Returns: objectness_loss: scalar Tensor objectness_label: (batch_size, num_seed) Tensor with value 0 or 1 objectness_mask: (batch_size, num_seed) Tensor with value 0 or 1 object_assignment: (batch_size, num_seed) Tensor with long int within [0,num_gt_object-1] """ # Associate proposal and GT objects by point-to-point distances aggregated_vote_xyz = end_points['aggregated_vote_xyz'][supervised_inds, ...] gt_center = end_points['center_label'][supervised_inds, ...][:, :, 0:3] B = gt_center.shape[0] # B_l K = aggregated_vote_xyz.shape[1] # in case the placeholders get associated gt_mask = (1 - end_points['box_label_mask'][supervised_inds, ...]).unsqueeze(-1).expand( -1, -1, 3).bool() gt_center[gt_mask] = -1000 dist1, ind1, dist2, _ = nn_distance( aggregated_vote_xyz, gt_center) # dist1: B_l xK, dist2: B_l xK2 # Generate objectness label and mask # objectness_label: 1 if pred object center is within NEAR_THRESHOLD of any GT object # objectness_mask: 0 if pred object center is in gray zone (DONOTCARE), 1 otherwise euclidean_dist1 = torch.sqrt(dist1 + 1e-6) objectness_label = torch.zeros((B, K), dtype=torch.long).cuda() objectness_mask = torch.zeros((B, K)).cuda() objectness_label[euclidean_dist1 < NEAR_THRESHOLD] = 1 objectness_mask[euclidean_dist1 < NEAR_THRESHOLD] = 1 objectness_mask[euclidean_dist1 > FAR_THRESHOLD] = 1 # Compute objectness loss objectness_scores = end_points['objectness_scores'][supervised_inds, ...] criterion = nn.CrossEntropyLoss( torch.Tensor(OBJECTNESS_CLS_WEIGHTS).cuda(), reduction='none') objectness_loss = criterion(objectness_scores.transpose(2, 1), objectness_label) objectness_loss = torch.sum(objectness_loss * objectness_mask) / ( torch.sum(objectness_mask) + 1e-6) # Set assignment object_assignment = ind1 # (B_l ,K) with values in 0,1,...,K2-1 return objectness_loss, objectness_label, objectness_mask, object_assignment
def compute_objectness_loss(data_dict): """ Compute objectness loss for the proposals. Args: data_dict: dict (read-only) Returns: objectness_loss: scalar Tensor objectness_label: (batch_size, num_seed) Tensor with value 0 or 1 objectness_mask: (batch_size, num_seed) Tensor with value 0 or 1 object_assignment: (batch_size, num_seed) Tensor with long int within [0,num_gt_object-1] """ # Associate proposal and GT objects by point-to-point distances aggregated_vote_xyz = data_dict['aggregated_vote_xyz'] gt_center = data_dict['center_label'][:, :, 0:3] B = gt_center.shape[0] K = aggregated_vote_xyz.shape[1] K2 = gt_center.shape[1] dist1, ind1, dist2, _ = nn_distance(aggregated_vote_xyz, gt_center) # dist1: BxK, dist2: BxK2 # Generate objectness label and mask # objectness_label: 1 if pred object center is within NEAR_THRESHOLD of any GT object # objectness_mask: 0 if pred object center is in gray zone (DONOTCARE), 1 otherwise euclidean_dist1 = torch.sqrt(dist1 + 1e-6) objectness_label = torch.zeros((B, K), dtype=torch.long).cuda() objectness_mask = torch.zeros((B, K)).cuda() objectness_label[euclidean_dist1 < NEAR_THRESHOLD] = 1 objectness_mask[euclidean_dist1 < NEAR_THRESHOLD] = 1 objectness_mask[euclidean_dist1 > FAR_THRESHOLD] = 1 # Compute objectness loss objectness_scores = data_dict['objectness_scores'] criterion = nn.CrossEntropyLoss( torch.Tensor(OBJECTNESS_CLS_WEIGHTS).cuda(), reduction='none') objectness_loss = criterion(objectness_scores.transpose(2, 1), objectness_label) objectness_loss = torch.sum(objectness_loss * objectness_mask) / ( torch.sum(objectness_mask) + 1e-6) # Set assignment object_assignment = ind1 # (B,K) with values in 0,1,...,K2-1 return objectness_loss, objectness_label, objectness_mask, object_assignment
def compute_iou_labels(end_points, unsupervised_inds, pred_votes, pred_center, pred_sem_cls, pred_objectness, pred_heading_scores, pred_heading_residuals, pred_size_scores, pred_size_residuals, config_dict, reverse=False): # the end_points labels are not transformed center_label = end_points['center_label'][unsupervised_inds, ...] zero_mask = (1 - end_points['box_label_mask'][unsupervised_inds, ...]).unsqueeze(-1).expand( -1, -1, 3).bool() center_label[zero_mask] = -1000 heading_class_label = end_points['heading_class_label'][unsupervised_inds, ...] heading_residual_label = end_points['heading_residual_label'][ unsupervised_inds, ...] size_class_label = end_points['size_class_label'][unsupervised_inds, ...] size_residual_label = end_points['size_residual_label'][unsupervised_inds, ...] pred_heading_class = torch.argmax(pred_heading_scores, -1) pred_heading_residual = torch.gather( pred_heading_residuals, 2, pred_heading_class.unsqueeze(-1)).squeeze(2) pred_size_class = torch.argmax(pred_size_scores, -1) pred_size_class_inds = pred_size_class.unsqueeze(-1).unsqueeze(-1).repeat( 1, 1, 1, 3) pred_size_residual = torch.gather(pred_size_residuals, 2, pred_size_class_inds).squeeze( 2) # B, num_proposals, 3 dist1, object_assignment, _, _ = nn_distance(pred_votes, center_label) euclidean_dist1 = torch.sqrt(dist1 + 1e-6) batch_size, num_proposal = euclidean_dist1.shape[:2] objectness_label = torch.zeros((batch_size, num_proposal), dtype=torch.long).cuda() objectness_label[euclidean_dist1 < NEAR_THRESHOLD] = 1 # ------------------------- GT BBOX ---------------------------------------- gt_size = config_dict['dataset_config'].class2size_gpu( size_class_label, size_residual_label) gt_angle = config_dict['dataset_config'].class2angle_gpu( heading_class_label, heading_residual_label) gt_bbox = torch.cat([center_label, gt_size, -gt_angle[:, :, None]], dim=2) pred_size = config_dict['dataset_config'].class2size_gpu( pred_size_class.detach(), pred_size_residual) pred_size[pred_size <= 0] = 1e-6 if config_dict['dataset_config'].num_heading_bin == 1: pred_angle = torch.zeros(pred_size.shape[:2]).cuda() else: pred_angle = config_dict['dataset_config'].class2angle_gpu( pred_heading_class.detach(), pred_heading_residual) pred_bbox = torch.cat([pred_center, pred_size, -pred_angle[:, :, None]], axis=2) end_points['pred_bbox'] = pred_bbox pred_num = pred_bbox.shape[1] gt_num = gt_bbox.shape[1] # start = time.time() gt_bbox_ = gt_bbox.view(-1, 7) pred_bbox_ = pred_bbox.view(-1, 7) if reverse: iou_labels = box3d_iou_batch_gpu(gt_bbox_, pred_bbox_) iou_labels = iou_labels.view(batch_size * gt_num, batch_size, -1) inds = torch.arange(batch_size).cuda().unsqueeze(1).expand( -1, gt_num * pred_num).contiguous().view(-1, 1, pred_num) iou_labels = iou_labels.gather(dim=1, index=inds).view( batch_size, -1, pred_num) iou_labels = iou_labels.detach() return iou_labels else: iou_labels = box3d_iou_batch_gpu(pred_bbox_, gt_bbox_) iou_labels, object_assignment = iou_labels.view( batch_size * pred_num, batch_size, -1).max(dim=2) inds = torch.arange(batch_size).cuda().unsqueeze(1).expand( -1, pred_num).contiguous().view(-1, 1) iou_labels = iou_labels.gather(dim=1, index=inds).view(batch_size, -1) iou_labels = iou_labels.detach() object_assignment = object_assignment.gather(dim=1, index=inds).view( batch_size, -1) return iou_labels, objectness_label, object_assignment
def compute_iou_labels_axis_aligned_gpu(end_points, unsupervised_inds, pred_votes, pred_center, pred_sem_cls, pred_objectness, pred_heading_scores, pred_heading_residuals, pred_size_scores, pred_size_residuals, config_dict): center_label = end_points['center_label'][unsupervised_inds, ...] zero_mask = (1 - end_points['box_label_mask'][unsupervised_inds, ...]).unsqueeze(-1).expand( -1, -1, 3).bool() center_label[zero_mask] = -1000 size_class_label = end_points['size_class_label'][unsupervised_inds, ...] size_residual_label = end_points['size_residual_label'][unsupervised_inds, ...] origin_object_assignment = end_points['object_assignment'][ unsupervised_inds, ...] dist1, object_assignment, _, _ = nn_distance(pred_votes, center_label) euclidean_dist1 = torch.sqrt(dist1 + 1e-6) batch_size, K = euclidean_dist1.shape[:2] objectness_label = torch.zeros((batch_size, K), dtype=torch.long).cuda() objectness_label[euclidean_dist1 < NEAR_THRESHOLD] = 1 pred_size_class = torch.argmax(pred_size_scores, -1) # B,num_proposal pred_size_residual = torch.gather( pred_size_residuals, 2, pred_size_class.unsqueeze(-1).unsqueeze(-1).repeat( 1, 1, 1, 3).detach()) # B,num_proposal,1,3 pred_size_residual = pred_size_residual.squeeze(2) gt_size = config_dict['dataset_config'].class2size_gpu( size_class_label, size_residual_label) / 2 gt_corners = torch.cat([(gt_size + center_label).unsqueeze(2), (center_label - gt_size).unsqueeze(2)], dim=2) pred_size = config_dict['dataset_config'].class2size_gpu( pred_size_class.detach(), pred_size_residual) / 2 pred_corners = torch.cat([(pred_size + pred_center).unsqueeze(2), (pred_center - pred_size).unsqueeze(2)], dim=2) batch_size, pred_num = pred_corners.shape[:2] gt_num = gt_corners.shape[1] pred_corners_expand_tile = pred_corners.unsqueeze(2).expand( -1, -1, gt_num, -1, -1).contiguous().view(batch_size, -1, 2, 3) gt_corners_expand_tile = gt_corners.unsqueeze(1).expand( -1, pred_num, -1, -1, -1).contiguous().view(batch_size, -1, 2, 3) iou_labels = box3d_iou_gpu_axis_aligned(gt_corners_expand_tile.detach(), pred_corners_expand_tile) iou_labels, object_assignment = iou_labels.view(batch_size, pred_num, gt_num).max(2) iou_zero_mask = (iou_labels < 1e-4).int() final_object_assignment = origin_object_assignment * iou_zero_mask + object_assignment * ( 1 - iou_zero_mask) end_points['acc_pred_iou'] = torch.sum(iou_labels) / iou_labels.view( -1).shape[0] end_points['acc_pred_iou_obj'] = torch.sum( iou_labels * objectness_label) / (torch.sum(objectness_label) + 1e-6) return iou_labels, iou_zero_mask, final_object_assignment
def compute_box_and_sem_cls_loss(end_points, supervised_inds, dataset_config, config_dict): """ Compute 3D bounding box and semantic classification loss. Args: end_points: dict (read-only) Returns: center_loss heading_cls_loss heading_reg_loss size_cls_loss size_reg_loss sem_cls_loss """ num_heading_bin = dataset_config.num_heading_bin num_size_cluster = dataset_config.num_size_cluster mean_size_arr = dataset_config.mean_size_arr object_assignment = end_points['object_assignment'] batch_size = object_assignment.shape[0] # Compute center loss dist1, ind1, dist2, _ = nn_distance( end_points['center'][supervised_inds, ...], end_points['center_label'][supervised_inds, ...][:, :, 0:3]) # dist1: BxK, dist2: BxK2 box_label_mask = end_points['box_label_mask'][supervised_inds, ...] objectness_label = end_points['objectness_label'].float() centroid_reg_loss1 = \ torch.sum(dist1 * objectness_label) / (torch.sum(objectness_label) + 1e-6) centroid_reg_loss2 = \ torch.sum(dist2 * box_label_mask) / (torch.sum(box_label_mask) + 1e-6) center_loss = centroid_reg_loss1 + centroid_reg_loss2 # Compute heading loss heading_class_label = torch.gather( end_points['heading_class_label'][supervised_inds, ...], 1, object_assignment) # select (B,K) from (B,K2) criterion_heading_class = nn.CrossEntropyLoss(reduction='none') heading_class_loss = criterion_heading_class( end_points['heading_scores'][supervised_inds, ...].transpose(2, 1), heading_class_label) # (B,K) heading_class_loss = torch.sum(heading_class_loss * objectness_label) / ( torch.sum(objectness_label) + 1e-6) heading_residual_label = torch.gather( end_points['heading_residual_label'][supervised_inds, ...], 1, object_assignment) # select (B,K) from (B,K2) heading_residual_normalized_label = heading_residual_label / ( np.pi / num_heading_bin) # Ref: https://discuss.pytorch.org/t/convert-int-into-one-hot-format/507/3 heading_label_one_hot = torch.cuda.FloatTensor( batch_size, heading_class_label.shape[1], num_heading_bin).zero_() heading_label_one_hot.scatter_( 2, heading_class_label.unsqueeze(-1), 1) # src==1 so it's *one-hot* (B,K,num_heading_bin) heading_residual_normalized_loss = huber_loss(torch.sum( end_points['heading_residuals_normalized'][supervised_inds, ...] * heading_label_one_hot, -1) - heading_residual_normalized_label, delta=1.0) # (B,K) heading_residual_normalized_loss = torch.sum( heading_residual_normalized_loss * objectness_label) / (torch.sum(objectness_label) + 1e-6) # Compute size loss size_class_label = torch.gather( end_points['size_class_label'][supervised_inds, ...], 1, object_assignment) # select (B,K) from (B,K2) criterion_size_class = nn.CrossEntropyLoss(reduction='none') size_class_loss = criterion_size_class( end_points['size_scores'][supervised_inds, ...].transpose(2, 1), size_class_label) # (B,K) size_class_loss = torch.sum(size_class_loss * objectness_label) / ( torch.sum(objectness_label) + 1e-6) size_residual_label = torch.gather( end_points['size_residual_label'][supervised_inds, ...], 1, object_assignment.unsqueeze(-1).repeat( 1, 1, 3)) # select (B,K,3) from (B,K2,3) size_label_one_hot = torch.cuda.FloatTensor(batch_size, size_class_label.shape[1], num_size_cluster).zero_() size_label_one_hot.scatter_( 2, size_class_label.unsqueeze(-1), 1) # src==1 so it's *one-hot* (B,K,num_size_cluster) size_label_one_hot_tiled = size_label_one_hot.unsqueeze(-1).repeat( 1, 1, 1, 3) # (B,K,num_size_cluster,3) predicted_size_residual_normalized = torch.sum( end_points['size_residuals_normalized'][supervised_inds, ...] * size_label_one_hot_tiled, 2) # (B,K,3) mean_size_arr_expanded = torch.from_numpy(mean_size_arr.astype( np.float32)).cuda().unsqueeze(0).unsqueeze( 0) # (1,1,num_size_cluster,3) mean_size_label = torch.sum(size_label_one_hot_tiled * mean_size_arr_expanded, 2) # (B,K,3) size_residual_label_normalized = size_residual_label / mean_size_label # (B,K,3) size_residual_normalized_loss = torch.mean( huber_loss(predicted_size_residual_normalized - size_residual_label_normalized, delta=1.0), -1) # (B,K,3) -> (B,K) size_residual_normalized_loss = torch.sum( size_residual_normalized_loss * objectness_label) / (torch.sum(objectness_label) + 1e-6) # 3.4 Semantic cls loss sem_cls_label = torch.gather(end_points['sem_cls_label'][supervised_inds, ...], 1, object_assignment) # select (B,K) from (B,K2) criterion_sem_cls = nn.CrossEntropyLoss(reduction='none') sem_cls_loss = criterion_sem_cls( end_points['sem_cls_scores'][supervised_inds, ...].transpose(2, 1), sem_cls_label) # (B,K) sem_cls_loss = torch.sum( sem_cls_loss * objectness_label) / (torch.sum(objectness_label) + 1e-6) end_points['cls_acc'] = torch.sum( (sem_cls_label == end_points['sem_cls_scores'][supervised_inds, ...].argmax(dim=-1)) * objectness_label) / (torch.sum(objectness_label) + 1e-6) iou_labels, _, iou_assignment = compute_iou_labels( # aggregated_vote_xyz -> center end_points, supervised_inds, end_points['aggregated_vote_xyz'][supervised_inds, ...], end_points['center'][supervised_inds, ...], None, None, end_points['heading_scores'][supervised_inds, ...], end_points['heading_residuals'][supervised_inds, ...], end_points['size_scores'][supervised_inds, ...], end_points['size_residuals'][supervised_inds, ...], config_dict={'dataset_config': dataset_config}) end_points['pred_iou_value'] = torch.sum(iou_labels) / iou_labels.view( -1).shape[0] end_points['pred_iou_obj_value'] = torch.sum( iou_labels * objectness_label) / (torch.sum(objectness_label) + 1e-6) end_points['obj_count'] = torch.sum(objectness_label) if 'jitter_center' in end_points.keys(): jitter_center = end_points['jitter_center'][supervised_inds, ...] jitter_size = end_points['jitter_size'][supervised_inds, ...] jitter_heading = end_points['jitter_heading'][supervised_inds, ...] jitter_objectness_label = torch.ones(batch_size, jitter_heading.shape[1]).cuda() center_label = end_points['center_label'][supervised_inds, ...] zero_mask = (1 - end_points['box_label_mask'][supervised_inds, ...] ).unsqueeze(-1).expand(-1, -1, 3).bool() center_label[zero_mask] = -1000 heading_class_label = end_points['heading_class_label'][ supervised_inds, ...] heading_residual_label = end_points['heading_residual_label'][ supervised_inds, ...] size_class_label = end_points['size_class_label'][supervised_inds, ...] size_residual_label = end_points['size_residual_label'][ supervised_inds, ...] gt_size = dataset_config.class2size_gpu(size_class_label, size_residual_label) / 2 gt_angle = dataset_config.class2angle_gpu(heading_class_label, heading_residual_label) gt_bbox = torch.cat([center_label, gt_size * 2, -gt_angle[:, :, None]], dim=2) pred_bbox = torch.cat( [jitter_center, jitter_size, -jitter_heading[:, :, None]], axis=2) pred_num = pred_bbox.shape[1] gt_bbox_ = gt_bbox.view(-1, 7) pred_bbox_ = pred_bbox.view(-1, 7) jitter_iou_labels = box3d_iou_batch_gpu(pred_bbox_, gt_bbox_) jitter_iou_labels, jitter_object_assignment = jitter_iou_labels.view( batch_size * pred_num, batch_size, -1).max(dim=2) inds = torch.arange(batch_size).cuda().unsqueeze(1).expand( -1, pred_num).contiguous().view(-1, 1) jitter_iou_labels = jitter_iou_labels.gather(dim=1, index=inds).view( batch_size, -1) jitter_iou_labels = jitter_iou_labels.detach() jitter_object_assignment = jitter_object_assignment.gather( dim=1, index=inds).view(batch_size, -1) jitter_sem_class_label = torch.gather( end_points['sem_cls_label'][supervised_inds, ...], 1, jitter_object_assignment) # select (B,K) from (B,K2) jitter_iou_pred = nn.Sigmoid()( end_points['iou_scores_jitter'][supervised_inds, ...]) if jitter_iou_pred.shape[2] > 1: # gt sem cls jitter_iou_pred = torch.gather( jitter_iou_pred, 2, jitter_sem_class_label.unsqueeze(-1)).squeeze( -1) # use pred semantic labels else: jitter_iou_pred = jitter_iou_pred.squeeze(-1) jitter_iou_acc = torch.abs(jitter_iou_pred - jitter_iou_labels) end_points['jitter_iou_acc'] = torch.sum( jitter_iou_acc) / jitter_iou_acc.view(-1).shape[0] end_points['jitter_iou_acc_obj'] = torch.sum( jitter_iou_acc * jitter_objectness_label) / ( torch.sum(jitter_objectness_label) + 1e-6) jitter_iou_loss = huber_loss(jitter_iou_pred - jitter_iou_labels.detach(), delta=1.0) jitter_iou_loss = torch.sum( jitter_iou_loss * jitter_objectness_label) / ( torch.sum(jitter_objectness_label) + 1e-6) end_points['jitter_iou_loss'] = jitter_iou_loss if 'iou_scores' in end_points.keys(): iou_pred = nn.Sigmoid()(end_points['iou_scores'][supervised_inds, ...]) if iou_pred.shape[2] > 1: # gt sem cls iou_sem_cls_label = torch.gather( end_points['sem_cls_label'][supervised_inds, ...], 1, iou_assignment) iou_pred = torch.gather(iou_pred, 2, iou_sem_cls_label.unsqueeze(-1)).squeeze( -1) # use pred semantic labels else: iou_pred = iou_pred.squeeze(-1) iou_acc = torch.abs(iou_pred - iou_labels) end_points['iou_acc'] = torch.sum(iou_acc) / torch.sum( torch.ones(iou_acc.shape)) end_points['iou_acc_obj'] = torch.sum( iou_acc * objectness_label) / (torch.sum(objectness_label) + 1e-6) iou_loss = huber_loss(iou_pred - iou_labels.detach(), delta=1.0) # (B, K, 1) iou_loss = iou_loss.mean() end_points['iou_loss'] = iou_loss return center_loss, heading_class_loss, heading_residual_normalized_loss, size_class_loss, size_residual_normalized_loss, sem_cls_loss
def compute_box_and_sem_cls_loss(data_dict, config): """ Compute 3D bounding box and semantic classification loss. Args: data_dict: dict (read-only) Returns: center_loss heading_cls_loss heading_reg_loss size_cls_loss size_reg_loss sem_cls_loss """ num_heading_bin = config.num_heading_bin num_size_cluster = config.num_size_cluster num_class = config.num_class mean_size_arr = config.mean_size_arr object_assignment = data_dict['object_assignment'] batch_size = object_assignment.shape[0] # Compute center loss pred_center = data_dict['center'] gt_center = data_dict['center_label'][:, :, 0:3] dist1, ind1, dist2, _ = nn_distance(pred_center, gt_center) # dist1: BxK, dist2: BxK2 box_label_mask = data_dict['box_label_mask'] objectness_label = data_dict['objectness_label'].float() centroid_reg_loss1 = \ torch.sum(dist1*objectness_label)/(torch.sum(objectness_label)+1e-6) centroid_reg_loss2 = \ torch.sum(dist2*box_label_mask)/(torch.sum(box_label_mask)+1e-6) center_loss = centroid_reg_loss1 + centroid_reg_loss2 # Compute heading loss heading_class_label = torch.gather( data_dict['heading_class_label'], 1, object_assignment) # select (B,K) from (B,K2) criterion_heading_class = nn.CrossEntropyLoss(reduction='none') heading_class_loss = criterion_heading_class( data_dict['heading_scores'].transpose(2, 1), heading_class_label) # (B,K) heading_class_loss = torch.sum(heading_class_loss * objectness_label) / ( torch.sum(objectness_label) + 1e-6) heading_residual_label = torch.gather( data_dict['heading_residual_label'], 1, object_assignment) # select (B,K) from (B,K2) heading_residual_normalized_label = heading_residual_label / ( np.pi / num_heading_bin) # Ref: https://discuss.pytorch.org/t/convert-int-into-one-hot-format/507/3 heading_label_one_hot = torch.cuda.FloatTensor( batch_size, heading_class_label.shape[1], num_heading_bin).zero_() heading_label_one_hot.scatter_( 2, heading_class_label.unsqueeze(-1), 1) # src==1 so it's *one-hot* (B,K,num_heading_bin) heading_residual_normalized_loss = huber_loss(torch.sum( data_dict['heading_residuals_normalized'] * heading_label_one_hot, -1) - heading_residual_normalized_label, delta=1.0) # (B,K) heading_residual_normalized_loss = torch.sum( heading_residual_normalized_loss * objectness_label) / (torch.sum(objectness_label) + 1e-6) # Compute size loss size_class_label = torch.gather( data_dict['size_class_label'], 1, object_assignment) # select (B,K) from (B,K2) criterion_size_class = nn.CrossEntropyLoss(reduction='none') size_class_loss = criterion_size_class(data_dict['size_scores'].transpose( 2, 1), size_class_label) # (B,K) size_class_loss = torch.sum(size_class_loss * objectness_label) / ( torch.sum(objectness_label) + 1e-6) size_residual_label = torch.gather(data_dict['size_residual_label'], 1, object_assignment.unsqueeze(-1).repeat( 1, 1, 3)) # select (B,K,3) from (B,K2,3) size_label_one_hot = torch.cuda.FloatTensor(batch_size, size_class_label.shape[1], num_size_cluster).zero_() size_label_one_hot.scatter_( 2, size_class_label.unsqueeze(-1), 1) # src==1 so it's *one-hot* (B,K,num_size_cluster) size_label_one_hot_tiled = size_label_one_hot.unsqueeze(-1).repeat( 1, 1, 1, 3) # (B,K,num_size_cluster,3) predicted_size_residual_normalized = torch.sum( data_dict['size_residuals_normalized'] * size_label_one_hot_tiled, 2) # (B,K,3) mean_size_arr_expanded = torch.from_numpy(mean_size_arr.astype( np.float32)).cuda().unsqueeze(0).unsqueeze( 0) # (1,1,num_size_cluster,3) mean_size_label = torch.sum(size_label_one_hot_tiled * mean_size_arr_expanded, 2) # (B,K,3) size_residual_label_normalized = size_residual_label / mean_size_label # (B,K,3) size_residual_normalized_loss = torch.mean( huber_loss(predicted_size_residual_normalized - size_residual_label_normalized, delta=1.0), -1) # (B,K,3) -> (B,K) size_residual_normalized_loss = torch.sum( size_residual_normalized_loss * objectness_label) / (torch.sum(objectness_label) + 1e-6) # 3.4 Semantic cls loss sem_cls_label = torch.gather(data_dict['sem_cls_label'], 1, object_assignment) # select (B,K) from (B,K2) criterion_sem_cls = nn.CrossEntropyLoss(reduction='none') sem_cls_loss = criterion_sem_cls(data_dict['sem_cls_scores'].transpose( 2, 1), sem_cls_label) # (B,K) sem_cls_loss = torch.sum( sem_cls_loss * objectness_label) / (torch.sum(objectness_label) + 1e-6) return center_loss, heading_class_loss, heading_residual_normalized_loss, size_class_loss, size_residual_normalized_loss, sem_cls_loss
def compute_box_and_sem_cls_loss(end_points, config, test_time=False): """ Compute 3D bounding box and semantic classification loss. Args: end_points: dict (read-only) Returns: center_loss heading_cls_loss heading_reg_loss size_cls_loss size_reg_loss sem_cls_loss """ num_heading_bin = config.num_heading_bin num_size_cluster = config.num_size_cluster num_class = config.num_class mean_size_arr = config.mean_size_arr object_assignment = end_points['object_assignment'] batch_size = object_assignment.shape[0] # Compute center loss pred_center = end_points['center'] gt_center = end_points['center_label'][:, :, 0:3] dist1, ind1, dist2, _ = nn_distance(pred_center, gt_center) # dist1: BxK, dist2: BxK2 box_label_mask = end_points['box_label_mask'] objectness_label = end_points['objectness_label'].float() centroid_reg_loss1 = \ torch.sum(dist1*objectness_label)/(torch.sum(objectness_label)+1e-6) centroid_reg_loss2 = \ torch.sum(dist2*box_label_mask)/(torch.sum(box_label_mask)+1e-6) center_loss = centroid_reg_loss1 + centroid_reg_loss2 # Compute heading loss heading_class_label = torch.gather( end_points['heading_class_label'], 1, object_assignment) # select (B,K) from (B,K2) criterion_heading_class = nn.CrossEntropyLoss(reduction='none') heading_class_loss = criterion_heading_class( end_points['heading_scores'].transpose(2, 1), heading_class_label) # (B,K) heading_class_loss = torch.sum(heading_class_loss * objectness_label) / ( torch.sum(objectness_label) + 1e-6) heading_residual_label = torch.gather( end_points['heading_residual_label'], 1, object_assignment) # select (B,K) from (B,K2) heading_residual_normalized_label = heading_residual_label / ( np.pi / num_heading_bin) # Ref: https://discuss.pytorch.org/t/convert-int-into-one-hot-format/507/3 heading_label_one_hot = torch.cuda.FloatTensor( batch_size, heading_class_label.shape[1], num_heading_bin).zero_() heading_label_one_hot.scatter_( 2, heading_class_label.unsqueeze(-1), 1) # src==1 so it's *one-hot* (B,K,num_heading_bin) heading_residual_normalized_loss = huber_loss(torch.sum( end_points['heading_residuals_normalized'] * heading_label_one_hot, -1) - heading_residual_normalized_label, delta=1.0) # (B,K) heading_residual_normalized_loss = torch.sum( heading_residual_normalized_loss * objectness_label) / (torch.sum(objectness_label) + 1e-6) # Compute size loss size_class_label = torch.gather( end_points['size_class_label'], 1, object_assignment) # select (B,K) from (B,K2) criterion_size_class = nn.CrossEntropyLoss(reduction='none') size_class_loss = criterion_size_class(end_points['size_scores'].transpose( 2, 1), size_class_label) # (B,K) size_class_loss = torch.sum(size_class_loss * objectness_label) / ( torch.sum(objectness_label) + 1e-6) size_residual_label = torch.gather(end_points['size_residual_label'], 1, object_assignment.unsqueeze(-1).repeat( 1, 1, 3)) # select (B,K,3) from (B,K2,3) size_label_one_hot = torch.cuda.FloatTensor(batch_size, size_class_label.shape[1], num_size_cluster).zero_() size_label_one_hot.scatter_( 2, size_class_label.unsqueeze(-1), 1) # src==1 so it's *one-hot* (B,K,num_size_cluster) size_label_one_hot_tiled = size_label_one_hot.unsqueeze(-1).repeat( 1, 1, 1, 3) # (B,K,num_size_cluster,3) predicted_size_residual_normalized = torch.sum( end_points['size_residuals_normalized'] * size_label_one_hot_tiled, 2) # (B,K,3) mean_size_arr_expanded = torch.from_numpy(mean_size_arr.astype( np.float32)).cuda().unsqueeze(0).unsqueeze( 0) # (1,1,num_size_cluster,3) mean_size_label = torch.sum(size_label_one_hot_tiled * mean_size_arr_expanded, 2) # (B,K,3) size_residual_label_normalized = size_residual_label / mean_size_label # (B,K,3) size_residual_normalized_loss = torch.mean( huber_loss(predicted_size_residual_normalized - size_residual_label_normalized, delta=1.0), -1) # (B,K,3) -> (B,K) size_residual_normalized_loss = torch.sum( size_residual_normalized_loss * objectness_label) / (torch.sum(objectness_label) + 1e-6) # 3.4 Semantic cls loss sem_cls_label = torch.gather(end_points['sem_cls_label'], 1, object_assignment) # select (B,K) from (B,K2) criterion_sem_cls = nn.CrossEntropyLoss(reduction='none') sem_cls_loss = criterion_sem_cls(end_points['sem_cls_scores'].transpose( 2, 1), sem_cls_label) # (B,K) sem_cls_loss = torch.sum( sem_cls_loss * objectness_label) / (torch.sum(objectness_label) + 1e-6) end_points['cls_acc'] = torch.sum( (sem_cls_label == end_points['sem_cls_scores'].argmax( dim=-1))).float() / sem_cls_label.view(-1).shape[0] end_points['cls_acc_obj'] = torch.sum( (sem_cls_label == end_points['sem_cls_scores'].argmax(dim=-1)) * objectness_label) / (torch.sum(objectness_label) + 1e-6) # end_points['center'].retain_grad() mask = torch.arange(batch_size).cuda() iou_labels, iou_zero_mask, _ = compute_iou_labels( end_points, mask, end_points['aggregated_vote_xyz'], end_points['center'], None, None, end_points['heading_scores'], end_points['heading_residuals'], end_points['size_scores'], end_points['size_residuals'], {'dataset_config': config}) end_points['iou_labels'] = iou_labels end_points['pred_iou_value'] = torch.sum(iou_labels) / iou_labels.view( -1).shape[0] end_points['pred_iou_obj_value'] = torch.sum( iou_labels * objectness_label) / (torch.sum(objectness_label) + 1e-6) if 'iou_scores' in end_points.keys(): iou_pred = nn.Sigmoid()(end_points['iou_scores']) if iou_pred.shape[2] > 1: iou_pred = torch.gather( iou_pred, 2, end_points['sem_cls_scores'].argmax( dim=-1).unsqueeze(-1)).squeeze( -1) # use pred semantic labels else: iou_pred = iou_pred.squeeze(-1) iou_acc = torch.abs(iou_pred - iou_labels) end_points['iou_acc'] = torch.sum(iou_acc) / torch.sum( torch.ones(iou_acc.shape)) end_points['iou_acc_obj'] = torch.sum( iou_acc * objectness_label) / (torch.sum(objectness_label) + 1e-6) iou_loss = huber_loss(iou_pred - iou_labels, delta=1.0) # (B, K, 1) iou_loss = torch.sum( iou_loss * objectness_label) / (torch.sum(objectness_label) + 1e-6) end_points['iou_loss'] = iou_loss return center_loss, heading_class_loss, heading_residual_normalized_loss, size_class_loss, size_residual_normalized_loss, sem_cls_loss
def compute_objectness_gt(end_points, unsupervised_inds): """ Compute cheating objectness loss for the proposals with GT labels Args: end_points: dict (read-only) unsupervised_inds: (batch_size, num_proposal) Tensor with value 0 or 1 Returns: objectness_loss: scalar Tensor objectness_label: (batch_size, num_proposal) Tensor with value 0 or 1 objectness_mask: (batch_size, num_proposal) Tensor with value 0 or 1 object_assignment: (batch_size, num_proposal) Tensor with long int within [0,num_gt_object-1] """ # Associate proposal and GT objects by point-to-point distances aggregated_vote_xyz = end_points['aggregated_vote_xyz'][unsupervised_inds, ...] gt_center = end_points['center_label'][unsupervised_inds, ...][:, :, 0:3] batch_size = gt_center.shape[0] num_proposal = aggregated_vote_xyz.shape[1] # ---- set the center of not gt placeholders to be -1000 ----- gt_mask = (1 - end_points['box_label_mask'][unsupervised_inds, ...]).unsqueeze(-1).expand( -1, -1, 3).bool() gt_center[gt_mask] = -1000 end_points['center_label'][unsupervised_inds, ...] = gt_center # ---------------- dist1, ind1, dist2, _ = nn_distance( aggregated_vote_xyz, gt_center) # dist1: B_l xK, dist2: B_l xK2 # Generate objectness label and mask # objectness_label: 1 if pred object center is within NEAR_THRESHOLD of any GT object # objectness_mask: 0 if pred object center is in gray zone (DONOTCARE), 1 otherwise euclidean_dist1 = torch.sqrt(dist1 + 1e-6) objectness_label = torch.zeros((batch_size, num_proposal), dtype=torch.long).cuda() objectness_mask = torch.zeros((batch_size, num_proposal)).cuda() objectness_label[euclidean_dist1 < NEAR_THRESHOLD] = 1 objectness_mask[euclidean_dist1 < NEAR_THRESHOLD] = 1 objectness_mask[euclidean_dist1 > FAR_THRESHOLD] = 1 # Compute objectness loss objectness_scores = end_points['objectness_scores'][unsupervised_inds, ...] criterion = nn.CrossEntropyLoss( torch.Tensor(OBJECTNESS_CLS_WEIGHTS).cuda(), reduction='none') objectness_loss = criterion(objectness_scores.transpose(2, 1), objectness_label) mask_sum = (torch.sum(objectness_mask) + 1e-6) objectness_loss = torch.sum(objectness_loss * objectness_mask) / mask_sum object_assignment = ind1 # (batch_size, num_proposal) with values in 0,1,...,K2-1 obj_pred_val = torch.argmax( end_points['objectness_scores'][unsupervised_inds, ...], 2) # B,K obj_acc = torch.sum((obj_pred_val == objectness_label.long()).float() * objectness_mask) / mask_sum end_points['true_unlabeled_obj_acc'] = obj_acc # this is true obj_acc return objectness_loss, objectness_label, objectness_mask, object_assignment
def compute_objectness_loss(end_points, unsupervised_inds, config_dict): """ Compute objectness loss for the proposals. Args: end_points: dict (read-only) unsupervised_inds: (batch_size, num_proposal) Tensor with value 0 or 1 Returns: objectness_loss: scalar Tensor objectness_label: (batch_size, num_seed) Tensor with value 0 or 1 objectness_mask: (batch_size, num_seed) Tensor with value 0 or 1 object_assignment: (batch_size, num_seed) Tensor with long int within [0,num_gt_object-1] """ # Associate proposal and GT objects by point-to-point distances aggregated_vote_xyz = end_points['aggregated_vote_xyz'][unsupervised_inds, ...] gt_center = end_points['unlabeled_center_label'][:, :, 0:3] batch_size = gt_center.shape[0] # B_l num_proposal = aggregated_vote_xyz.shape[1] # ---- mod ----- gt_mask = (1 - end_points['unlabeled_box_label_mask']).unsqueeze(-1).expand( -1, -1, 3).bool() gt_center[gt_mask] = -1000 # ---------------- ##################### # may lose 15%-20% obj=1 (estimate) if config_dict['samecls_match']: dist1, ind1, dist2, ind2 = nn_distance_withcls( aggregated_vote_xyz, gt_center, torch.argmax(end_points['sem_cls_scores'][unsupervised_inds, ...], dim=2), end_points['unlabeled_sem_cls_label']) else: dist1, ind1, dist2, ind2 = nn_distance( aggregated_vote_xyz, gt_center) # dist1: B_l xK, dist2: B_l xK2 ###################### # Generate objectness label and mask # objectness_label: 1 if pred object center is within NEAR_THRESHOLD of any GT object # objectness_mask: 0 if pred object center is in gray zone (DONOTCARE), 1 otherwise euclidean_dist1 = torch.sqrt(dist1 + 1e-6) objectness_label = torch.zeros((batch_size, num_proposal), dtype=torch.long).cuda() objectness_mask = torch.zeros((batch_size, num_proposal)).cuda() objectness_label[euclidean_dist1 < NEAR_THRESHOLD] = 1 objectness_mask[euclidean_dist1 < NEAR_THRESHOLD] = 1 objectness_mask[euclidean_dist1 > FAR_THRESHOLD] = 1 # Compute objectness loss objectness_scores = end_points['objectness_scores'][unsupervised_inds, ...] criterion = nn.CrossEntropyLoss( torch.Tensor(OBJECTNESS_CLS_WEIGHTS).cuda(), reduction='none') objectness_loss = criterion(objectness_scores.transpose(2, 1), objectness_label) objectness_loss = torch.sum(objectness_loss * objectness_mask) / ( torch.sum(objectness_mask) + 1e-6) # Set assignment object_assignment = ind1 # (batch_size, num_proposal) with values in 0,1,...,K2-1 # only use these for cheating experiments return objectness_loss, objectness_label, objectness_mask, object_assignment