def get_output_boxes(self): end_points = {} # adapt the dimension for k in ['center_x_scores', 'center_x_residuals_normalized', 'center_z_scores', 'center_z_residuals_normalized', 'center_y_residuals', 'heading_scores', 'heading_residuals_normalized', 'size_scores', 'size_residuals_normalized']: end_points[k] = tf.expand_dims(self.end_points[k], axis=1) box_center, box_angle, box_size = self.box_encoder.tf_decode(end_points) box_center = tf.squeeze(box_center, axis=1) box_center = box_center + tf.slice(self.placeholders['proposal_boxes'], [0,0], [-1,3]) box_angle = tf.squeeze(box_angle, axis=1) box_angle += tf.gather(self.placeholders['proposal_boxes'], 6, axis=-1) # resotre absoluate angle box_size = tf.squeeze(box_size, axis=1) self.end_points['box_center'] = box_center self.end_points['box_angle'] = box_angle self.end_points['box_size'] = box_size corners_3d = get_box3d_corners_helper(box_center, box_angle, box_size) self.end_points['box_corners'] = corners_3d # box score seg_scores = tf.reduce_max(tf.nn.softmax(self.end_points['cls_logits']), axis=-1) # (B,) bin_x_scores = tf.reduce_max(tf.nn.softmax(self.end_points['center_x_scores']), axis=-1) # (B,M) bin_z_scores = tf.reduce_max(tf.nn.softmax(self.end_points['center_z_scores']), axis=-1) # (B,M) heading_scores = tf.reduce_max(tf.nn.softmax(self.end_points['heading_scores']), axis=-1) # (B,M) size_scores = tf.reduce_max(tf.nn.softmax(self.end_points['size_scores']), axis=-1) # (B,M) # confidence = seg_scores + bin_x_scores + bin_z_scores + heading_scores + size_scores confidence = seg_scores * bin_x_scores * bin_z_scores * heading_scores * size_scores self.end_points['box_score'] = confidence return corners_3d
def tf_project_to_image_space(boxes, calib, image_shape): """ Projects 3D tensor boxes into image space Args: boxes: a tensor of anchors in the shape [B, 7]. The anchors are in the format [x, y, z, l, h, w, ry] calib: tensor [3, 4] stereo camera calibration p2 matrix image_shape: a float32 tensor of shape [2]. This is dimension of the image [h, w] Returns: box_corners: a float32 tensor corners in image space - N x [x1, y1, x2, y2] box_corners_norm: a float32 tensor corners as a percentage of the image size - N x [x1, y1, x2, y2] """ batch_size = boxes.shape[0] box_center = tf.slice(boxes, [0, 0], [-1, 3]) box_size = tf.slice(boxes, [0, 3], [-1, 3]) box_angle = tf.slice(boxes, [0, 6], [-1, 1]) corners_3d = get_box3d_corners_helper(box_center, tf.gather(box_angle, 0, axis=-1), box_size) # (B,8,3) #corners_3d_list = tf.reshape(corners_3d, [batch_size*8, 3]) # corners_3d = tf.expand_dims(corners_3d, axis=2) # (B,8,1,3) corners_3d_hom = tf.concat( [corners_3d, tf.ones((batch_size, 8, 1))], axis=-1) # (B,8,4) corners_3d_hom = tf.expand_dims(corners_3d_hom, axis=-1) # (B,8,4,1) calib_tiled = tf.tile(tf.expand_dims(calib, 1), [1, 8, 1, 1]) # (B,8,3,4) projected_pts = tf.matmul(calib_tiled, corners_3d_hom) # (B,8,3,1) projected_pts = tf.squeeze(projected_pts, axis=-1) # (B,8,3) projected_pts_norm = projected_pts / tf.slice( projected_pts, [0, 0, 2], [-1, -1, 1]) # divided by depth corners_2d = tf.gather(projected_pts_norm, [0, 1], axis=-1) # (B,8,2) pts_2d_min = tf.reduce_min(corners_2d, axis=1) pts_2d_max = tf.reduce_max(corners_2d, axis=1) # (B, 2) box_corners = tf.stack([ tf.gather(pts_2d_min, 0, axis=1), tf.gather(pts_2d_min, 1, axis=1), tf.gather(pts_2d_max, 0, axis=1), tf.gather(pts_2d_max, 1, axis=1), ], axis=1) # (B,4) # Normalize image_shape_h = image_shape[0] image_shape_w = image_shape[1] image_shape_tiled = tf.tile( [[image_shape_w, image_shape_h, image_shape_w, image_shape_h]], [batch_size, 1]) box_corners_norm = box_corners / tf.to_float(image_shape_tiled) return box_corners, box_corners_norm
def get_corner_loss(self, preds, gts): center_label, heading_label, size_label = gts center_preds, heading_preds, size_preds = preds corners_3d_gt = get_box3d_corners_helper(center_label, heading_label, size_label) corners_3d_gt_flip = get_box3d_corners_helper(center_label, heading_label + np.pi, size_label) corners_3d_pred = get_box3d_corners_helper(center_preds, heading_preds, size_preds) # N, 8, 3 corners_dist = torch.min( torch.norm(corners_3d_pred - corners_3d_gt, 2, dim=-1).mean(-1), torch.norm(corners_3d_pred - corners_3d_gt_flip, 2, dim=-1).mean(-1)) # corners_dist = torch.norm(corners_3d_pred - corners_3d_gt, 2, dim=-1) corners_loss = huber_loss(corners_dist, delta=1.0) return corners_loss, corners_3d_gt
def parse_output_to_tensors(self, output, end_points): ''' Parse batch output to separate tensors (added to end_points) Input: output: TF tensor in shape (B,N,NUM_CENTER_BIN*2*2+1+NUM_HEADING_BIN*2+NUM_SIZE_CLUSTER*4) end_points: dict Output: end_points: dict (updated) ''' batch_size = output.get_shape()[0].value npoints = output.get_shape()[1].value # objectness and center #end_points['objectness'] = tf.slice(output, [0,0,0], [-1,-1,2]) center_x_scores = tf.slice(output, [0, 0, 0], [-1, -1, NUM_CENTER_BIN]) center_x_residuals_normalized = tf.slice(output, [0, 0, NUM_CENTER_BIN], [-1, -1, NUM_CENTER_BIN]) end_points['center_x_scores'] = center_x_scores # (B,N,NUM_CENTER_BIN) end_points['center_x_residuals_normalized'] = \ center_x_residuals_normalized # (B,N,NUM_CENTER_BIN) center_z_scores = tf.slice(output, [0, 0, NUM_CENTER_BIN * 2], [-1, -1, NUM_CENTER_BIN]) center_z_residuals_normalized = tf.slice(output, [0, 0, NUM_CENTER_BIN * 3], [-1, -1, NUM_CENTER_BIN]) end_points['center_z_scores'] = center_z_scores # (B,N,NUM_CENTER_BIN) end_points['center_z_residuals_normalized'] = \ center_z_residuals_normalized # (B,N,NUM_CENTER_BIN) end_points['center_y_residuals'] = tf.slice(output, [0, 0, NUM_CENTER_BIN * 4], [-1, -1, 1]) # heading heading_scores = tf.slice(output, [0, 0, NUM_CENTER_BIN * 4 + 1], [-1, -1, NUM_HEADING_BIN]) heading_residuals_normalized = tf.slice( output, [0, 0, NUM_CENTER_BIN * 4 + 1 + NUM_HEADING_BIN], [-1, -1, NUM_HEADING_BIN]) end_points['heading_scores'] = heading_scores # (B,N,NUM_HEADING_BIN) end_points[ 'heading_residuals_normalized'] = heading_residuals_normalized # (B,N,NUM_HEADING_BIN) # end_points['heading_residuals'] = \ # heading_residuals_normalized * (np.pi/NUM_HEADING_BIN) # BxNUM_HEADING_BIN # size size_scores = tf.slice( output, [0, 0, NUM_CENTER_BIN * 4 + 1 + NUM_HEADING_BIN * 2], [-1, -1, NUM_SIZE_CLUSTER]) # BxNUM_SIZE_CLUSTER size_residuals_normalized = tf.slice(output, [ 0, 0, NUM_CENTER_BIN * 4 + 1 + NUM_HEADING_BIN * 2 + NUM_SIZE_CLUSTER ], [-1, -1, NUM_SIZE_CLUSTER * 3]) size_residuals_normalized = tf.reshape( size_residuals_normalized, [batch_size, npoints, NUM_SIZE_CLUSTER, 3]) end_points['size_scores'] = size_scores end_points['size_residuals_normalized'] = size_residuals_normalized # end_points['size_residuals'] = size_residuals_normalized * \ # tf.expand_dims(tf.constant(type_mean_size, dtype=tf.float32), 0) box_center, box_angle, box_size = self.box_encoder.tf_decode( end_points) box_center = box_center + end_points['fg_points_xyz'] box_num = batch_size * npoints corners_3d = get_box3d_corners_helper( tf.reshape(box_center, [box_num, 3]), tf.reshape(box_angle, [box_num]), tf.reshape(box_size, [box_num, 3])) end_points['proposal_boxes'] = tf.reshape(corners_3d, [batch_size, npoints, 8, 3]) return end_points
def forward(self, data_dicts): #dict_keys(['point_cloud', 'rot_angle', 'box3d_center', 'size_class', 'size_residual', 'angle_class', 'angle_residual', 'one_hot', 'label', 'center_ref1', 'center_ref2', 'center_ref3', 'center_ref4']) point_cloud = data_dicts.get('point_cloud') #torch.Size([32, 4, 1024]) one_hot = data_dicts.get('one_hot') #torch.Size([32, 3]) ref_label = data_dicts.get('ref_label') #torch.Size([32, 140]) bs = point_cloud.shape[0] # If not None, use to Compute Loss #seg_label = data_dicts.get('seg')#torch.Size([32, 1024]) box3d_center_label = data_dicts.get( 'box3d_center') #torch.Size([32, 3]) size_class_label = data_dicts.get('size_class') #torch.Size([32]) #size_residual_label = data_dicts.get('size_residual') # torch.Size([32, 3])### #heading_class_label = data_dicts.get('angle_class') # torch.Size([32])### #heading_residual_label = data_dicts.get('angle_residual') # torch.Size([32])### box3d_size_label = data_dicts.get('box3d_size') ###not residual box3d_heading_label = data_dicts.get('box3d_heading') ###not residual center_ref1 = data_dicts.get('center_ref1') #torch.Size([32, 3, 280]) center_ref2 = data_dicts.get('center_ref2') #torch.Size([32, 3, 140]) center_ref3 = data_dicts.get('center_ref3') #torch.Size([32, 3, 70]) center_ref4 = data_dicts.get('center_ref4') #torch.Size([32, 3, 35]) object_point_cloud_xyz = point_cloud[:, :3, :].contiguous() if point_cloud.shape[1] == 4: object_point_cloud_i = point_cloud[:, [3], :].contiguous( ) #torch.Size([32, 1, 1024]) elif point_cloud.shape[1] == 6: object_point_cloud_i = point_cloud[:, 3:6, :].contiguous( ) # torch.Size([32, 3, 1024]) else: object_point_cloud_i = None mean_size_array = torch.from_numpy(g_mean_size_arr).type_as( point_cloud) feat1, feat2, feat3, feat4 = self.feat_net( object_point_cloud_xyz, [center_ref1, center_ref2, center_ref3, center_ref4], object_point_cloud_i, one_hot) #feat1:torch.Size([32, 131, 280]) #feat2:torch.Size([32, 131, 140]) #feat3:torch.Size([32, 131, 70]) #feat4:torch.Size([32, 131, 35]) x = self.conv_net(feat1, feat2, feat3, feat4) ##torch.Size([32, 768, 140]) cls_scores = self.cls_out(x) #torch.Size([32, 2, 140]) outputs = self.reg_out(x) #torch.Size([32, 39, 140]) num_out = outputs.shape[2] output_size = outputs.shape[1] # b, c, n -> b, n, c cls_scores = cls_scores.permute(0, 2, 1).contiguous().view( -1, 2) #torch.Size([4480, 2]) outputs = outputs.permute(0, 2, 1).contiguous().view( -1, output_size) #torch.Size([4480, 39]) center_ref2 = center_ref2.permute(0, 2, 1).contiguous().view( -1, 3) #torch.Size([4480, 3]) cls_probs = F.softmax(cls_scores, -1) #torch.Size([4480, 2]) if box3d_center_label is None: #no label == test mode or from rgb detection -> return output det_outputs = self._slice_output(outputs) # torch.Size([4480, 39]) center_boxnet, heading_scores, heading_res_norm, size_scores, size_res_norm = det_outputs heading_probs = F.softmax(heading_scores, -1) # torch.Size([4480, 12]) size_probs = F.softmax(size_scores, -1) # torch.Size([4480, 3]) heading_pred_label = torch.argmax(heading_probs, -1) size_pred_label = torch.argmax(size_probs, -1) center_preds = center_boxnet + center_ref2 heading_preds = angle_decode(heading_res_norm, heading_pred_label) size_preds = size_decode(size_res_norm, mean_size_array, size_pred_label) # corner_preds = get_box3d_corners_helper(center_preds, heading_preds, size_preds) cls_probs = cls_probs.view(bs, -1, 2) center_preds = center_preds.view(bs, -1, 3) size_preds = size_preds.view(bs, -1, 3) heading_preds = heading_preds.view(bs, -1) outputs = (cls_probs, center_preds, heading_preds, size_preds) return outputs fg_idx = (ref_label.view(-1) == 1).nonzero().view( -1) #torch.Size([99]) assert fg_idx.numel() != 0 outputs = outputs[fg_idx, :] #torch.Size([99, 39]) center_ref2 = center_ref2[fg_idx] #torch.Size([99, 3]) det_outputs = self._slice_output(outputs) center_boxnet, heading_scores, heading_res_norm, size_scores, size_res_norm = det_outputs #(99,3+12+12+3+3x3) heading_probs = F.softmax(heading_scores, -1) #torch.Size([99, 12]) size_probs = F.softmax(size_scores, -1) #torch.Size([99, 3]) # cls_loss = F.cross_entropy(cls_scores, mask_label, ignore_index=-1) cls_loss = softmax_focal_loss_ignore(cls_probs, ref_label.view(-1), ignore_idx=-1) heading_probs = F.softmax(heading_scores, -1) size_probs = F.softmax(size_scores, -1) # cls_loss = F.cross_entropy(cls_scores, mask_label, ignore_index=-1) cls_loss = softmax_focal_loss_ignore(cls_probs, ref_label.view(-1), ignore_idx=-1) # prepare label center_label = box3d_center_label.unsqueeze(1).expand(-1, num_out, -1)\ .contiguous().view(-1, 3)[fg_idx]#torch.Size([99, 3]) size_label = box3d_size_label.unsqueeze(1).expand(-1, num_out, -1)\ .contiguous().view(-1, 3)[fg_idx]#torch.Size([99, 3]) heading_label = box3d_heading_label.view(-1,1).expand(-1, num_out)\ .contiguous().view(-1)[fg_idx]#torch.Size([99]) size_class_label = size_class_label.view(-1,1).expand(-1, num_out)\ .contiguous().view(-1)[fg_idx]#torch.Size([99]) # encode regression targets center_gt_offsets = center_encode(center_label, center_ref2) #torch.Size([99, 3]) heading_class_label, heading_res_norm_label = angle_encode( heading_label) #torch.Size([99]),torch.Size([99]) size_res_label_norm = size_encode( size_label, mean_size_array, size_class_label) #torch.Size([99, 3]) # loss calculation # center_loss center_loss = self.get_center_loss(center_boxnet, center_gt_offsets) # heading loss heading_class_loss, heading_res_norm_loss = self.get_heading_loss( heading_scores, heading_res_norm, heading_class_label, heading_res_norm_label) # size loss size_class_loss, size_res_norm_loss = self.get_size_loss( size_scores, size_res_norm, size_class_label, size_res_label_norm) # corner loss regulation center_preds = center_decode(center_ref2, center_boxnet) heading = angle_decode(heading_res_norm, heading_class_label) size = size_decode(size_res_norm, mean_size_array, size_class_label) corners_loss, corner_gts = self.get_corner_loss( (center_preds, heading, size), (center_label, heading_label, size_label)) BOX_LOSS_WEIGHT = cfg.LOSS.BOX_LOSS_WEIGHT CORNER_LOSS_WEIGHT = cfg.LOSS.CORNER_LOSS_WEIGHT HEAD_REG_WEIGHT = cfg.LOSS.HEAD_REG_WEIGHT SIZE_REG_WEIGHT = cfg.LOSS.SIZE_REG_WEIGHT # Weighted sum of all losses loss = cls_loss + \ BOX_LOSS_WEIGHT * (center_loss + heading_class_loss + size_class_loss + HEAD_REG_WEIGHT * heading_res_norm_loss + SIZE_REG_WEIGHT * size_res_norm_loss + CORNER_LOSS_WEIGHT * corners_loss) # some metrics to monitor training status with torch.no_grad(): # accuracy cls_prec = get_accuracy(cls_probs, ref_label.view(-1)) heading_prec = get_accuracy(heading_probs, heading_class_label.view(-1)) size_prec = get_accuracy(size_probs, size_class_label.view(-1)) # iou metrics heading_pred_label = torch.argmax(heading_probs, -1) size_pred_label = torch.argmax(size_probs, -1) heading_preds = angle_decode(heading_res_norm, heading_pred_label) size_preds = size_decode(size_res_norm, mean_size_array, size_pred_label) corner_preds = get_box3d_corners_helper(center_preds, heading_preds, size_preds) overlap = rbbox_iou_3d_pair(corner_preds.detach().cpu().numpy(), corner_gts.detach().cpu().numpy()) iou2ds, iou3ds = overlap[:, 0], overlap[:, 1] iou2d_mean = iou2ds.mean() iou3d_mean = iou3ds.mean() iou3d_gt_mean = (iou3ds >= cfg.IOU_THRESH).mean() iou2d_mean = torch.tensor(iou2d_mean).type_as(cls_prec) iou3d_mean = torch.tensor(iou3d_mean).type_as(cls_prec) iou3d_gt_mean = torch.tensor(iou3d_gt_mean).type_as(cls_prec) losses = { 'total_loss': loss, 'cls_loss': cls_loss, 'center_loss': center_loss, 'heading_class_loss': heading_class_loss, 'heading_residual_normalized_loss': heading_res_norm_loss, 'size_class_loss': size_class_loss, 'size_residual_normalized_loss': size_res_norm_loss, 'corners_loss': corners_loss } metrics = { 'cls_acc': cls_prec, 'head_acc': heading_prec, 'size_acc': size_prec, 'iou2d': iou2d_mean, 'iou3d': iou3d_mean, 'iou3d_' + str(cfg.IOU_THRESH): iou3d_gt_mean } return losses, metrics
def get_loss(center_label, angle_cls_label, angle_res_label, size_res_label, end_points): # Center regression losses x_dist = tf.norm(center_label[..., 0] - end_points['center'][..., 0], axis=-1) x_loss = huber_loss(x_dist, delta=1.0) y_dist = tf.norm(center_label[..., 1] - end_points['center'][..., 1], axis=-1) y_loss = huber_loss(y_dist, delta=1.0) z_dist = tf.norm(center_label[..., 2] - end_points['center'][..., 2], axis=-1) z_loss = huber_loss(z_dist, delta=1.0) center_loss = x_loss + y_loss + z_loss # center_dist = tf.norm(center_label - end_points['center'], axis=-1) # center_loss = huber_loss(center_dist, delta=2.0) tf.summary.scalar('center_loss', center_loss) stage1_x_dist = tf.norm(center_label[..., 0] - end_points['center_delta'][..., 0], axis=-1) stage1_x_loss = huber_loss(stage1_x_dist, delta=1.0) stage1_y_dist = tf.norm(center_label[..., 1] - end_points['center_delta'][..., 1], axis=-1) stage1_y_loss = huber_loss(stage1_y_dist, delta=1.0) stage1_z_dist = tf.norm(center_label[..., 2] - end_points['center_delta'][..., 2], axis=-1) stage1_z_loss = huber_loss(stage1_z_dist, delta=1.0) # stage1_center_dist = tf.norm(center_label - end_points['center_delta'], axis=-1) # stage1_center_loss = huber_loss(stage1_center_dist, delta=1.0) stage1_center_loss = stage1_x_loss + stage1_y_loss + stage1_z_loss tf.summary.scalar('stage1 center loss', stage1_center_loss) # Heading angle loss angle_cls_loss = tf.reduce_mean( \ tf.nn.sparse_softmax_cross_entropy_with_logits( \ logits=end_points['angle_scores'], labels=angle_cls_label)) tf.summary.scalar('angle_class_loss', angle_cls_loss) hcls_onehot = tf.one_hot(angle_cls_label, depth=NUM_ANGLE_BIN, on_value=1, off_value=0, axis=-1) # BxNUM_ANGLE_BIN angle_per_class = 2 * np.pi / NUM_ANGLE_BIN angle_res_normalized_label = angle_res_label / (angle_per_class / 2) angle_res_normalized_loss = huber_loss(tf.reduce_sum( \ end_points['angle_res_normalized'] * tf.to_float(hcls_onehot), axis=1) - \ angle_res_normalized_label, delta=1.0) tf.summary.scalar('angle_res_loss', angle_res_normalized_loss) # Size loss mean_sizes = tf.expand_dims( \ tf.constant(g_mean_size_arr, dtype=tf.float32), 0) # (1,NS,3) size_res_label_normalized = size_res_label / mean_sizes # size_normalized_dist = tf.norm( \ # size_res_label_normalized - end_points['size_res_normalized'], # axis=-1) l_dist = tf.norm(size_res_label_normalized[..., 0] - end_points['size_res_normalized'][..., 0], axis=-1) l_loss = huber_loss(l_dist, delta=1.0) w_dist = tf.norm(size_res_label_normalized[..., 1] - end_points['size_res_normalized'][..., 1], axis=-1) w_loss = huber_loss(w_dist, delta=1.0) h_dist = tf.norm(size_res_label_normalized[..., 2] - end_points['size_res_normalized'][..., 2], axis=-1) h_loss = huber_loss(h_dist, delta=1.0) # size_res_normalized_loss = huber_loss(size_normalized_dist, delta=1.0) size_res_normalized_loss = l_loss + w_loss + h_loss tf.summary.scalar('size_res_loss', size_res_normalized_loss) # Corner loss corners_3d = get_box3d_corners(end_points['center'], end_points['angle_res'], end_points['size_res']) # (B,NH,8,3) corners_3d_pred = tf.reduce_sum( \ tf.to_float(tf.expand_dims(tf.expand_dims(hcls_onehot, -1), -1)) * corners_3d, axis=[1]) # (B,8,3) angle_bin_centers = tf.constant( \ np.arange(0, 2*np.pi, 2 * np.pi/NUM_ANGLE_BIN), dtype=tf.float32) # (NH,) heading_label = tf.expand_dims(angle_res_label, 1) + \ tf.expand_dims(angle_bin_centers, 0) # (B,NH) heading_label = tf.reduce_sum(tf.to_float(hcls_onehot) * heading_label, 1) size_label = mean_sizes + size_res_label corners_3d_gt = get_box3d_corners_helper(center_label, heading_label, size_label) # (B,8,3) corners_3d_gt_flip = get_box3d_corners_helper( \ center_label, heading_label + np.pi, size_label) # (B,8,3) corners_dist = tf.minimum( tf.norm(corners_3d_pred - corners_3d_gt, axis=-1), tf.norm(corners_3d_pred - corners_3d_gt_flip, axis=-1)) # corners_dist = tf.norm(corners_3d_pred - corners_3d_gt, axis=-1) corners_loss = huber_loss(corners_dist, delta=1.0) tf.summary.scalar('corners_loss', corners_loss) total_loss = center_loss + \ stage1_center_loss + \ angle_cls_loss + \ 20.0 * angle_res_normalized_loss + \ 20.0 * size_res_normalized_loss + \ 5.0 * corners_loss tf.add_to_collection('losses', total_loss) return [total_loss, center_loss, stage1_center_loss, angle_cls_loss, \ angle_res_normalized_loss, size_res_normalized_loss, corners_loss]
def get_strong_loss(pred, labels, end_points, prefix='', reg_weight=0.001, reduce_loss=True, c=None): pred_seg, pred_box = pred pred_center, pred_dims_cls, pred_dims_reg, pred_orient_cls, pred_orient_reg = pred_box y_seg, y_center, y_orient_cls, y_orient_reg, y_dims_cls, y_dims_reg = labels # 1. Segmentation loss mask_losses = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits(logits=pred_seg, labels=y_seg), axis=1) mask_loss = tf.reduce_mean(mask_losses) tf.summary.scalar('Strong_Loss/3d mask loss', mask_loss) # 2. Box Estimation loss # Center losses center_dist = tf.norm(y_center - end_points[prefix + 'center'], axis=-1) center_losses = huber_loss(center_dist, delta=2.0, reduce_loss=False) center_loss = tf.reduce_mean(center_losses) tf.summary.scalar('Strong_Loss/center loss', center_loss) stage1_center_dist = tf.norm(y_center - end_points['stage1_center'], axis=-1) stage1_center_losses = huber_loss(stage1_center_dist, delta=1.0, reduce_loss=False) stage1_center_loss = tf.reduce_mean(stage1_center_losses) tf.summary.scalar('Strong_Loss/stage1 center loss', stage1_center_loss) # Heading losses heading_class_losses = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=end_points[prefix + 'heading_scores'], labels=y_orient_cls) heading_class_loss = tf.reduce_mean(heading_class_losses) tf.summary.scalar('Strong_Loss/heading class loss', heading_class_loss) y_orient_cls_onehot = tf.one_hot(y_orient_cls, depth=NUM_HEADING_BIN, on_value=1, off_value=0, axis=-1) print(y_orient_cls_onehot.shape) heading_residual_normalized_label = y_orient_reg / (np.pi / NUM_HEADING_BIN) heading_residual_normalized_losses = huber_loss( tf.reduce_sum(end_points[prefix + 'heading_residuals_normalized'] * \ tf.to_float(y_orient_cls_onehot), axis=1) - heading_residual_normalized_label, delta=1.0, reduce_loss=False) heading_residual_normalized_loss = tf.reduce_mean( heading_residual_normalized_losses) tf.summary.scalar('Strong_Loss/heading residual normalized loss', heading_residual_normalized_loss) # Size losses size_class_losses = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=end_points[prefix + 'size_scores'], labels=y_dims_cls) size_class_loss = tf.reduce_mean(size_class_losses) tf.summary.scalar('Strong_Loss/size class loss', size_class_loss) y_dims_cls_onehot = tf.one_hot(y_dims_cls, depth=NUM_SIZE_CLUSTER, on_value=1, off_value=0, axis=-1) # (B,NUM_SIZE_CLUSTER) y_dims_cls_onehot_tiled = tf.tile( tf.expand_dims(tf.to_float(y_dims_cls_onehot), -1), [1, 1, 3]) # (B,NUM_SIZE_CLUSTER,3) predicted_size_residual_normalized = tf.reduce_sum( end_points[prefix + 'size_residuals_normalized'] * y_dims_cls_onehot_tiled, axis=[1]) # (B,3) tmp3 = tf.expand_dims(tf.constant(MEAN_DIMS_ARR, dtype=tf.float32), 0) # (1,NUM_SIZE_CLUSTER,3) mean_size_label = tf.reduce_sum(y_dims_cls_onehot_tiled * tmp3, axis=[1]) # (B,3) size_residual_label_normalized = y_dims_reg / mean_size_label size_normalized_dist = tf.norm(size_residual_label_normalized - \ predicted_size_residual_normalized, axis=-1) size_residual_normalized_losses = huber_loss(size_normalized_dist, delta=1.0, reduce_loss=False) size_residual_normalized_loss = tf.reduce_mean( size_residual_normalized_losses) tf.summary.scalar('Strong_Loss/size residual normalized loss', size_residual_normalized_loss) # Corner loss # Compute BOX3D corners corners_3d = get_box3d_corners_sunrgbd( end_points[prefix + 'center'], end_points[prefix + 'heading_residuals'], end_points[prefix + 'size_residuals']) # (B,NH,NS,8,3) gt_mask = tf.tile(tf.expand_dims(y_orient_cls_onehot, 2), [1,1,NUM_SIZE_CLUSTER]) * \ tf.tile(tf.expand_dims(y_dims_cls_onehot,1), [1,NUM_HEADING_BIN,1]) # (B,NH,NS) corners_3d_pred = tf.reduce_sum( tf.to_float(tf.expand_dims(tf.expand_dims(gt_mask, -1), -1)) * corners_3d, axis=[1, 2]) # (B,8,3) heading_bin_centers = tf.constant(np.arange(0, 2 * np.pi, 2 * np.pi / NUM_HEADING_BIN), dtype=tf.float32) # (NH,) heading_label = tf.expand_dims(y_orient_reg, 1) + \ tf.expand_dims(heading_bin_centers, 0) # (B,NH) heading_label = tf.reduce_sum( tf.to_float(y_orient_cls_onehot) * heading_label, 1) mean_sizes = tf.expand_dims(tf.constant(MEAN_DIMS_ARR, dtype=tf.float32), 0) # (1,NS,3) size_label = mean_sizes + tf.expand_dims( y_dims_reg, 1) # (1,NS,3) + (B,1,3) = (B,NS,3) size_label = tf.reduce_sum(tf.expand_dims(tf.to_float(y_dims_cls_onehot), -1) * \ size_label, axis=[1]) # (B,3) corners_3d_gt = get_box3d_corners_helper(y_center, heading_label, size_label) # (B,8,3) corners_3d_gt_flip = get_box3d_corners_helper(y_center, heading_label + np.pi, size_label) # (B,8,3) corners_dist = tf.minimum( tf.norm(corners_3d_pred - corners_3d_gt, axis=-1), tf.norm(corners_3d_pred - corners_3d_gt_flip, axis=-1)) print(str(corners_dist.shape) + ' (Corners dist)') corners_losses = tf.reduce_mean(huber_loss(corners_dist, delta=1.0, reduce_loss=False), axis=1) corners_loss = tf.reduce_mean(corners_losses) tf.summary.scalar('Strong_Loss/corners loss', corners_loss) # if reduce_loss: # total_losses = c.STRONG_WEIGHT_CROSS_ENTROPY * mask_loss + \ # c.STRONG_BOX_MULTIPLER * \ # (c.STRONG_WEIGHT_CENTER * center_loss + \ # c.STRONG_WEIGHT_ORIENT_CLS * heading_class_loss + \ # c.STRONG_WEIGHT_DIMS_CLS * size_class_loss + \ # c.STRONG_WEIGHT_ORIENT_REG * heading_residual_normalized_loss + \ # c.STRONG_WEIGHT_DIMS_REG * size_residual_normalized_loss + \ # c.STRONG_WEIGHT_TNET_CENTER * stage1_center_loss) + \ # c.STRONG_WEIGHT_CORNER * corners_loss # else: # total_losses = c.STRONG_WEIGHT_CROSS_ENTROPY * mask_losses + \ # c.STRONG_BOX_MULTIPLER * \ # (c.STRONG_WEIGHT_CENTER * center_losses + \ # c.STRONG_WEIGHT_ORIENT_CLS * heading_class_losses + \ # c.STRONG_WEIGHT_DIMS_CLS * size_class_losses + \ # c.STRONG_WEIGHT_ORIENT_REG * heading_residual_normalized_losses + \ # c.STRONG_WEIGHT_DIMS_REG * size_residual_normalized_losses + \ # c.STRONG_WEIGHT_TNET_CENTER * stage1_center_losses) + \ # c.STRONG_WEIGHT_CORNER * corners_losses mask_losses = c.STRONG_WEIGHT_CROSS_ENTROPY * mask_losses total_losses = c.STRONG_BOX_MULTIPLER * \ (c.STRONG_WEIGHT_CENTER * center_losses + \ c.STRONG_WEIGHT_ORIENT_CLS * heading_class_losses + \ c.STRONG_WEIGHT_DIMS_CLS * size_class_losses + \ c.STRONG_WEIGHT_ORIENT_REG * heading_residual_normalized_losses + \ c.STRONG_WEIGHT_DIMS_REG * size_residual_normalized_losses + \ c.STRONG_WEIGHT_TNET_CENTER * stage1_center_losses) + \ c.STRONG_WEIGHT_CORNER * corners_losses return mask_losses, total_losses
center_x_residuals_normalized = np.tile(center_res[:1], NUM_CENTER_BIN) center_z_scores = get_one_hot(center_cls[1:],NUM_CENTER_BIN) center_z_residuals_normalized = np.tile(center_res[2:], NUM_CENTER_BIN) heading_scores = get_one_hot([angle_cls], NUM_HEADING_BIN) heading_residuals_normalized = np.tile([angle_res], NUM_HEADING_BIN) size_scores = get_one_hot([size_cls], NUM_SIZE_CLUSTER) size_residuals_normalized = np.tile([size_res], (NUM_SIZE_CLUSTER,1)) fg_points_xyz = tf.constant(np.array([[point]]), dtype=tf.float32) centers, angles, sizes = box_encoder.tf_decode({ # 'fg_points_xyz': tf.constant(np.array([[point]]), dtype=tf.float32), 'center_x_scores': tf.constant(np.array([[center_x_scores]]), dtype=tf.float32), 'center_x_residuals_normalized': tf.constant(np.array([[center_x_residuals_normalized]]), dtype=tf.float32), 'center_z_scores': tf.constant(np.array([[center_z_scores]]), dtype=tf.float32), 'center_z_residuals_normalized': tf.constant(np.array([[center_z_residuals_normalized]]), dtype=tf.float32), 'center_y_residuals': tf.constant(np.array([[center_res[1:2]]]), dtype=tf.float32), 'heading_scores': tf.constant(np.array([[heading_scores]]), dtype=tf.float32), 'heading_residuals_normalized': tf.constant(np.array([[heading_residuals_normalized]]), dtype=tf.float32), 'size_scores': tf.constant(np.array([[size_scores]]), dtype=tf.float32), 'size_residuals_normalized': tf.constant(np.array([[size_residuals_normalized]]), dtype=tf.float32) }) centers = centers + fg_points_xyz N = 1 * 1 # batch * num_point corners_3d = get_box3d_corners_helper(tf.reshape(centers, [N,3]), tf.reshape(angles, [N]), tf.reshape(sizes, [N,3])) with tf.Session() as sess: c, a, s = sess.run([centers, angles, sizes]) print(obj.t, '<->', c[0][0]) print(obj.ry, '<->', a[0][0]) print([obj.l, obj.h, obj.w], '<->', s[0][0]) corners_list = sess.run(corners_3d) print(corners_list)