def forward(self, rpn_scores, rpn_reg, xyz): """ Note all the inputs are in batch form :param rpn_scores: (B, N): This is the regression(raw) output of the classification (no sigmoid applied) B:number of batches, N: number of points :param rpn_reg: (B, N, 9) we have 9 box features. (I don't think in the original code the third dimension is 8 it should be much more with all the classification) :param xyz: (B, N, 3) :return bbox3d: (B, M, 9) """ batch_size = xyz.shape[0] proposals = decode_bbox_target(xyz.view(-1, 3), rpn_reg.view(-1, rpn_reg.shape[-1]), anchor_size=self.MEAN_SIZE, loc_scope=cfg.RPN.LOC_SCOPE, loc_bin_size=cfg.RPN.LOC_BIN_SIZE, num_head_bin=cfg.RPN.NUM_HEAD_BIN, get_xz_fine=cfg.RPN.LOC_XZ_FINE, get_y_by_bin=False, get_ry_fine=False) # (N, 9) proposals = proposals.view(batch_size, -1, rpn_reg.shape[-1]) # for the rpn classification output(raw) the more positive a point is the more likely it is in a box. # so we can sort by this value for most likely points in a box. scores = rpn_scores _, sorted_idxs = torch.sort(scores, dim=1, descending=True) batch_size = scores.size(0) # new is new_empty in the new pytorch version. # Returns a Tensor of given size filled with uninitialized data. # By default, the returned Tensor has the same torch.dtype and torch.device as this tensor. ret_bbox3d = scores.new( batch_size, cfg[self.mode].RPN_POST_NMS_TOP_N, rpn_reg.shape[-1] ).zero_() # self.mode == 'TEST' , RPN_POST_NMS_TOP_N==100 ret_scores = scores.new(batch_size, cfg[self.mode].RPN_POST_NMS_TOP_N).zero_() for k in range(batch_size): # score of all the points in one scene scores_single = scores[k] # proposed bboxes of 1 scene (each point proposes a bbox) proposals_single = proposals[k] # sorted indexes of points in this scene accroding to score (first one has the highest score) order_single = sorted_idxs[k] '''if cfg.TEST.RPN_DISTANCE_BASED_PROPOSE: # this is true scores_single, proposals_single = self.distance_based_proposal(scores_single, proposals_single, order_single) else:''' scores_single, proposals_single = self.score_based_proposal( scores_single, proposals_single, order_single) proposals_tot = proposals_single.size(0) ret_bbox3d[k, :proposals_tot] = proposals_single ret_scores[k, :proposals_tot] = scores_single return ret_bbox3d, ret_scores
def forward(self, rpn_scores, rpn_reg, xyz): """ :param rpn_scores: (B, N) :param rpn_reg: (B, N, 8) :param xyz: (B, N, 3) :return bbox3d: (B, M, 7) """ # pdb.set_trace() batch_size = xyz.shape[0] proposals = decode_bbox_target(xyz.view(-1, 3), rpn_reg.view(-1, rpn_reg.shape[-1]), anchor_size=self.MEAN_SIZE, loc_scope=cfg.RPN.LOC_SCOPE, loc_bin_size=cfg.RPN.LOC_BIN_SIZE, num_head_bin=cfg.RPN.NUM_HEAD_BIN, get_xz_fine=cfg.RPN.LOC_XZ_FINE, get_y_by_bin=False, get_ry_fine=False) # (N, 7) proposals[:, 1] = proposals[:, 1] + proposals[:, 3] / 2 # set y as the center of bottom proposals = proposals.view(batch_size, -1, 7) scores = rpn_scores _, sorted_idxs = torch.sort(scores, dim=1, descending=True) batch_size = scores.size(0) ret_bbox3d = scores.new(batch_size, cfg[self.mode].RPN_POST_NMS_TOP_N, 7).zero_() ret_scores = scores.new(batch_size, cfg[self.mode].RPN_POST_NMS_TOP_N).zero_() for k in range(batch_size): scores_single = scores[k] proposals_single = proposals[k] order_single = sorted_idxs[k] if cfg.TEST.RPN_DISTANCE_BASED_PROPOSE: scores_single, proposals_single = self.distance_based_proposal( scores_single, proposals_single, order_single) else: scores_single, proposals_single = self.score_based_proposal( scores_single, proposals_single, order_single) proposals_tot = proposals_single.size(0) ret_bbox3d[k, :proposals_tot] = proposals_single ret_scores[k, :proposals_tot] = scores_single return ret_bbox3d, ret_scores
def forward(self, rpn_reg, xyz, gt_center=None): """ :param rpn_scores: (B, N) :param rpn_reg: (B, N, 8) :param xyz: (B, N, 3) :return bbox3d: (B, M, 7) """ batch_size = xyz.shape[0] proposals = decode_bbox_target(xyz.view(-1, 3), rpn_reg.view(-1, rpn_reg.shape[-1]), anchor_size=self.MEAN_SIZE, loc_scope=cfg.RPN.LOC_SCOPE, loc_bin_size=cfg.RPN.LOC_BIN_SIZE, num_head_bin=cfg.RPN.NUM_HEAD_BIN, get_xz_fine=cfg.RPN.LOC_XZ_FINE, get_y_by_bin=False, get_ry_fine=False) # (N, 7) # proposals[:, 1] += proposals[:, 3] / 2 # set y as the center of bottom proposals = proposals.view(batch_size, -1, 3) return proposals[..., :3]
def eval_one_epoch_joint(model, dataloader, epoch_id, result_dir, logger): np.random.seed(666) MEAN_SIZE = torch.from_numpy(cfg.CLS_MEAN_SIZE[0]).cuda() mode = 'TEST' if args.test else 'EVAL' final_output_dir = os.path.join(result_dir, 'final_result', 'data') os.makedirs(final_output_dir, exist_ok=True) if args.save_result: roi_output_dir = os.path.join(result_dir, 'roi_result', 'data') refine_output_dir = os.path.join(result_dir, 'refine_result', 'data') rpn_output_dir = os.path.join(result_dir, 'rpn_result', 'data') os.makedirs(rpn_output_dir, exist_ok=True) os.makedirs(roi_output_dir, exist_ok=True) os.makedirs(refine_output_dir, exist_ok=True) logger.info('---- EPOCH %s JOINT EVALUATION ----' % epoch_id) logger.info('==> Output file: %s' % result_dir) model.eval() thresh_list = [0.1, 0.3, 0.5, 0.7, 0.9] total_recalled_bbox_list, total_gt_bbox = [0] * 5, 0 total_roi_recalled_bbox_list = [0] * 5 dataset = dataloader.dataset cnt = final_total = total_cls_acc = total_cls_acc_refined = total_rpn_iou = 0 progress_bar = tqdm.tqdm(total=len(dataloader), leave=True, desc='eval') for data in dataloader: cnt += 1 sample_id, pts_rect, pts_features, pts_input = \ data['sample_id'], data['pts_rect'], data['pts_features'], data['pts_input'] batch_size = len(sample_id) inputs = torch.from_numpy(pts_input).cuda(non_blocking=True).float() input_data = {'pts_input': inputs} # model inference ret_dict = model(input_data) roi_scores_raw = ret_dict['roi_scores_raw'] # (B, M) roi_boxes3d = ret_dict['rois'] # (B, M, 7) seg_result = ret_dict['seg_result'].long() # (B, N) rcnn_cls = ret_dict['rcnn_cls'].view(batch_size, -1, ret_dict['rcnn_cls'].shape[1]) rcnn_reg = ret_dict['rcnn_reg'].view( batch_size, -1, ret_dict['rcnn_reg'].shape[1]) # (B, M, C) # bounding box regression anchor_size = MEAN_SIZE if cfg.RCNN.SIZE_RES_ON_ROI: assert False pred_boxes3d = decode_bbox_target( roi_boxes3d.view(-1, 7), rcnn_reg.view(-1, rcnn_reg.shape[-1]), anchor_size=anchor_size, loc_scope=cfg.RCNN.LOC_SCOPE, loc_bin_size=cfg.RCNN.LOC_BIN_SIZE, num_head_bin=cfg.RCNN.NUM_HEAD_BIN, get_xz_fine=True, get_y_by_bin=cfg.RCNN.LOC_Y_BY_BIN, loc_y_scope=cfg.RCNN.LOC_Y_SCOPE, loc_y_bin_size=cfg.RCNN.LOC_Y_BIN_SIZE, get_ry_fine=True).view(batch_size, -1, 7) # scoring if rcnn_cls.shape[2] == 1: raw_scores = rcnn_cls # (B, M, 1) norm_scores = torch.sigmoid(raw_scores) pred_classes = (norm_scores > cfg.RCNN.SCORE_THRESH).long() else: pred_classes = torch.argmax(rcnn_cls, dim=1).view(-1) cls_norm_scores = F.softmax(rcnn_cls, dim=1) raw_scores = rcnn_cls[:, pred_classes] norm_scores = cls_norm_scores[:, pred_classes] # evaluation recalled_num = gt_num = rpn_iou = 0 if not args.test: if not cfg.RPN.FIXED: rpn_cls_label, rpn_reg_label = data['rpn_cls_label'], data[ 'rpn_reg_label'] rpn_cls_label = torch.from_numpy(rpn_cls_label).cuda( non_blocking=True).long() gt_boxes3d = data['gt_boxes3d'] for k in range(batch_size): # calculate recall cur_gt_boxes3d = gt_boxes3d[k] tmp_idx = cur_gt_boxes3d.__len__() - 1 while tmp_idx >= 0 and cur_gt_boxes3d[tmp_idx].sum() == 0: tmp_idx -= 1 if tmp_idx >= 0: cur_gt_boxes3d = cur_gt_boxes3d[:tmp_idx + 1] cur_gt_boxes3d = torch.from_numpy(cur_gt_boxes3d).cuda( non_blocking=True).float() iou3d = iou3d_utils.boxes_iou3d_gpu( pred_boxes3d[k], cur_gt_boxes3d) gt_max_iou, _ = iou3d.max(dim=0) refined_iou, _ = iou3d.max(dim=1) for idx, thresh in enumerate(thresh_list): total_recalled_bbox_list[idx] += (gt_max_iou > thresh).sum().item() recalled_num += (gt_max_iou > 0.7).sum().item() gt_num += cur_gt_boxes3d.shape[0] total_gt_bbox += cur_gt_boxes3d.shape[0] # original recall iou3d_in = iou3d_utils.boxes_iou3d_gpu( roi_boxes3d[k], cur_gt_boxes3d) gt_max_iou_in, _ = iou3d_in.max(dim=0) for idx, thresh in enumerate(thresh_list): total_roi_recalled_bbox_list[idx] += ( gt_max_iou_in > thresh).sum().item() if not cfg.RPN.FIXED: fg_mask = rpn_cls_label > 0 correct = ((seg_result == rpn_cls_label) & fg_mask).sum().float() union = fg_mask.sum().float() + (seg_result > 0).sum().float() - correct rpn_iou = correct / torch.clamp(union, min=1.0) total_rpn_iou += rpn_iou.item() disp_dict = { 'mode': mode, 'recall': '%d/%d' % (total_recalled_bbox_list[3], total_gt_bbox) } progress_bar.set_postfix(disp_dict) progress_bar.update() if args.save_result: # save roi and refine results roi_boxes3d_np = roi_boxes3d.cpu().numpy() pred_boxes3d_np = pred_boxes3d.cpu().numpy() roi_scores_raw_np = roi_scores_raw.cpu().numpy() raw_scores_np = raw_scores.cpu().numpy() rpn_cls_np = ret_dict['rpn_cls'].cpu().numpy() rpn_xyz_np = ret_dict['backbone_xyz'].cpu().numpy() seg_result_np = seg_result.cpu().numpy() output_data = np.concatenate( (rpn_xyz_np, rpn_cls_np.reshape(batch_size, -1, 1), seg_result_np.reshape(batch_size, -1, 1)), axis=2) for k in range(batch_size): cur_sample_id = sample_id[k] calib = dataset.get_calib(cur_sample_id) image_shape = dataset.get_image_shape(cur_sample_id) save_kitti_format(cur_sample_id, calib, roi_boxes3d_np[k], roi_output_dir, roi_scores_raw_np[k], image_shape) save_kitti_format(cur_sample_id, calib, pred_boxes3d_np[k], refine_output_dir, raw_scores_np[k], image_shape) output_file = os.path.join(rpn_output_dir, '%06d.npy' % cur_sample_id) np.save(output_file, output_data.astype(np.float32)) # scores thresh inds = norm_scores > cfg.RCNN.SCORE_THRESH for k in range(batch_size): cur_inds = inds[k].view(-1) if cur_inds.sum() == 0: continue pred_boxes3d_selected = pred_boxes3d[k, cur_inds] raw_scores_selected = raw_scores[k, cur_inds] norm_scores_selected = norm_scores[k, cur_inds] # NMS thresh # rotated nms boxes_bev_selected = kitti_utils.boxes3d_to_bev_torch( pred_boxes3d_selected) keep_idx = iou3d_utils.nms_gpu(boxes_bev_selected, raw_scores_selected, cfg.RCNN.NMS_THRESH).view(-1) pred_boxes3d_selected = pred_boxes3d_selected[keep_idx] scores_selected = raw_scores_selected[keep_idx] pred_boxes3d_selected, scores_selected = pred_boxes3d_selected.cpu( ).numpy(), scores_selected.cpu().numpy() cur_sample_id = sample_id[k] calib = dataset.get_calib(cur_sample_id) final_total += pred_boxes3d_selected.shape[0] image_shape = dataset.get_image_shape(cur_sample_id) save_kitti_format(cur_sample_id, calib, pred_boxes3d_selected, final_output_dir, scores_selected, image_shape) progress_bar.close() # dump empty files split_file = os.path.join(dataset.imageset_dir, '..', '..', 'ImageSets', dataset.split + '.txt') split_file = os.path.abspath(split_file) image_idx_list = [x.strip() for x in open(split_file).readlines()] empty_cnt = 0 for k in range(image_idx_list.__len__()): cur_file = os.path.join(final_output_dir, '%s.txt' % image_idx_list[k]) if not os.path.exists(cur_file): with open(cur_file, 'w') as temp_f: pass empty_cnt += 1 logger.info('empty_cnt=%d: dump empty file %s' % (empty_cnt, cur_file)) ret_dict = {'empty_cnt': empty_cnt} logger.info( '-------------------performance of epoch %s---------------------' % epoch_id) logger.info(str(datetime.now())) avg_rpn_iou = (total_rpn_iou / max(cnt, 1.0)) avg_cls_acc = (total_cls_acc / max(cnt, 1.0)) avg_cls_acc_refined = (total_cls_acc_refined / max(cnt, 1.0)) avg_det_num = (final_total / max(len(dataset), 1.0)) logger.info('final average detections: %.3f' % avg_det_num) logger.info('final average rpn_iou refined: %.3f' % avg_rpn_iou) logger.info('final average cls acc: %.3f' % avg_cls_acc) logger.info('final average cls acc refined: %.3f' % avg_cls_acc_refined) ret_dict['rpn_iou'] = avg_rpn_iou ret_dict['rcnn_cls_acc'] = avg_cls_acc ret_dict['rcnn_cls_acc_refined'] = avg_cls_acc_refined ret_dict['rcnn_avg_num'] = avg_det_num for idx, thresh in enumerate(thresh_list): cur_roi_recall = total_roi_recalled_bbox_list[idx] / max( total_gt_bbox, 1.0) logger.info('total roi bbox recall(thresh=%.3f): %d / %d = %f' % (thresh, total_roi_recalled_bbox_list[idx], total_gt_bbox, cur_roi_recall)) ret_dict['rpn_recall(thresh=%.2f)' % thresh] = cur_roi_recall for idx, thresh in enumerate(thresh_list): cur_recall = total_recalled_bbox_list[idx] / max(total_gt_bbox, 1.0) logger.info( 'total bbox recall(thresh=%.3f): %d / %d = %f' % (thresh, total_recalled_bbox_list[idx], total_gt_bbox, cur_recall)) ret_dict['rcnn_recall(thresh=%.2f)' % thresh] = cur_recall if cfg.TEST.SPLIT != 'test': logger.info('Averate Precision:') name_to_class = {'Car': 0, 'Pedestrian': 1, 'Cyclist': 2} ap_result_str, ap_dict = kitti_evaluate( dataset.label_dir, final_output_dir, label_split_file=split_file, current_class=name_to_class[cfg.CLASSES]) logger.info(ap_result_str) ret_dict.update(ap_dict) logger.info('result is saved to: %s' % result_dir) return ret_dict
def eval_one_epoch_rcnn(model, dataloader, epoch_id, result_dir, logger): np.random.seed(1024) MEAN_SIZE = torch.from_numpy(cfg.CLS_MEAN_SIZE[0]).cuda() mode = 'TEST' if args.test else 'EVAL' final_output_dir = os.path.join(result_dir, 'final_result', 'data') os.makedirs(final_output_dir, exist_ok=True) if args.save_result: roi_output_dir = os.path.join(result_dir, 'roi_result', 'data') refine_output_dir = os.path.join(result_dir, 'refine_result', 'data') os.makedirs(roi_output_dir, exist_ok=True) os.makedirs(refine_output_dir, exist_ok=True) logger.info('---- EPOCH %s RCNN EVALUATION ----' % epoch_id) model.eval() thresh_list = [0.1, 0.3, 0.5, 0.7, 0.9] total_recalled_bbox_list, total_gt_bbox = [0] * 5, 0 total_roi_recalled_bbox_list = [0] * 5 dataset = dataloader.dataset cnt = final_total = total_cls_acc = total_cls_acc_refined = 0 progress_bar = tqdm.tqdm(total=len(dataloader), leave=True, desc='eval') for data in dataloader: sample_id = data['sample_id'] cnt += 1 assert args.batch_size == 1, 'Only support bs=1 here' input_data = {} for key, val in data.items(): if key != 'sample_id': input_data[key] = torch.from_numpy(val).contiguous().cuda( non_blocking=True).float() roi_boxes3d = input_data['roi_boxes3d'] roi_scores = input_data['roi_scores'] if cfg.RCNN.ROI_SAMPLE_JIT: for key, val in input_data.items(): if key in ['gt_iou', 'gt_boxes3d']: continue input_data[key] = input_data[key].unsqueeze(dim=0) else: pts_input = torch.cat( (input_data['pts_input'], input_data['pts_features']), dim=-1) input_data['pts_input'] = pts_input ret_dict = model(input_data) rcnn_cls = ret_dict['rcnn_cls'] rcnn_reg = ret_dict['rcnn_reg'] # bounding box regression anchor_size = MEAN_SIZE if cfg.RCNN.SIZE_RES_ON_ROI: roi_size = input_data['roi_size'] anchor_size = roi_size pred_boxes3d = decode_bbox_target( roi_boxes3d, rcnn_reg, anchor_size=anchor_size, loc_scope=cfg.RCNN.LOC_SCOPE, loc_bin_size=cfg.RCNN.LOC_BIN_SIZE, num_head_bin=cfg.RCNN.NUM_HEAD_BIN, get_xz_fine=True, get_y_by_bin=cfg.RCNN.LOC_Y_BY_BIN, loc_y_scope=cfg.RCNN.LOC_Y_SCOPE, loc_y_bin_size=cfg.RCNN.LOC_Y_BIN_SIZE, get_ry_fine=True) # scoring if rcnn_cls.shape[1] == 1: raw_scores = rcnn_cls.view(-1) norm_scores = torch.sigmoid(raw_scores) pred_classes = (norm_scores > cfg.RCNN.SCORE_THRESH).long() else: pred_classes = torch.argmax(rcnn_cls, dim=1).view(-1) cls_norm_scores = F.softmax(rcnn_cls, dim=1) raw_scores = rcnn_cls[:, pred_classes] norm_scores = cls_norm_scores[:, pred_classes] # evaluation disp_dict = {'mode': mode} if not args.test: gt_boxes3d = input_data['gt_boxes3d'] gt_iou = input_data['gt_iou'] # calculate recall gt_num = gt_boxes3d.shape[0] if gt_num > 0: iou3d = iou3d_utils.boxes_iou3d_gpu(pred_boxes3d, gt_boxes3d) gt_max_iou, _ = iou3d.max(dim=0) refined_iou, _ = iou3d.max(dim=1) for idx, thresh in enumerate(thresh_list): total_recalled_bbox_list[idx] += (gt_max_iou > thresh).sum().item() recalled_num = (gt_max_iou > 0.7).sum().item() total_gt_bbox += gt_num iou3d_in = iou3d_utils.boxes_iou3d_gpu(roi_boxes3d, gt_boxes3d) gt_max_iou_in, _ = iou3d_in.max(dim=0) for idx, thresh in enumerate(thresh_list): total_roi_recalled_bbox_list[idx] += (gt_max_iou_in > thresh).sum().item() # classification accuracy cls_label = (gt_iou > cfg.RCNN.CLS_FG_THRESH).float() cls_valid_mask = ((gt_iou >= cfg.RCNN.CLS_FG_THRESH) | (gt_iou <= cfg.RCNN.CLS_BG_THRESH)).float() cls_acc = ((pred_classes == cls_label.long()).float() * cls_valid_mask).sum() / max(cls_valid_mask.sum(), 1.0) iou_thresh = 0.7 if cfg.CLASSES == 'Car' else 0.5 cls_label_refined = (gt_iou >= iou_thresh).float() cls_acc_refined = ( pred_classes == cls_label_refined.long()).float().sum() / max( cls_label_refined.shape[0], 1.0) total_cls_acc += cls_acc.item() total_cls_acc_refined += cls_acc_refined.item() disp_dict['recall'] = '%d/%d' % (total_recalled_bbox_list[3], total_gt_bbox) disp_dict['cls_acc_refined'] = '%.2f' % cls_acc_refined.item() progress_bar.set_postfix(disp_dict) progress_bar.update() image_shape = dataset.get_image_shape(sample_id) if args.save_result: # save roi and refine results roi_boxes3d_np = roi_boxes3d.cpu().numpy() pred_boxes3d_np = pred_boxes3d.cpu().numpy() calib = dataset.get_calib(sample_id) save_kitti_format(sample_id, calib, roi_boxes3d_np, roi_output_dir, roi_scores, image_shape) save_kitti_format(sample_id, calib, pred_boxes3d_np, refine_output_dir, raw_scores.cpu().numpy(), image_shape) # NMS and scoring # scores thresh inds = norm_scores > cfg.RCNN.SCORE_THRESH if inds.sum() == 0: continue pred_boxes3d_selected = pred_boxes3d[inds] raw_scores_selected = raw_scores[inds] # NMS thresh boxes_bev_selected = kitti_utils.boxes3d_to_bev_torch( pred_boxes3d_selected) keep_idx = iou3d_utils.nms_gpu(boxes_bev_selected, raw_scores_selected, cfg.RCNN.NMS_THRESH) pred_boxes3d_selected = pred_boxes3d_selected[keep_idx] scores_selected = raw_scores_selected[keep_idx] pred_boxes3d_selected, scores_selected = pred_boxes3d_selected.cpu( ).numpy(), scores_selected.cpu().numpy() calib = dataset.get_calib(sample_id) final_total += pred_boxes3d_selected.shape[0] save_kitti_format(sample_id, calib, pred_boxes3d_selected, final_output_dir, scores_selected, image_shape) progress_bar.close() # dump empty files split_file = os.path.join(dataset.imageset_dir, '..', '..', 'ImageSets', dataset.split + '.txt') split_file = os.path.abspath(split_file) image_idx_list = [x.strip() for x in open(split_file).readlines()] empty_cnt = 0 for k in range(image_idx_list.__len__()): cur_file = os.path.join(final_output_dir, '%s.txt' % image_idx_list[k]) if not os.path.exists(cur_file): with open(cur_file, 'w') as temp_f: pass empty_cnt += 1 logger.info('empty_cnt=%d: dump empty file %s' % (empty_cnt, cur_file)) ret_dict = {'empty_cnt': empty_cnt} logger.info( '-------------------performance of epoch %s---------------------' % epoch_id) logger.info(str(datetime.now())) avg_cls_acc = (total_cls_acc / max(cnt, 1.0)) avg_cls_acc_refined = (total_cls_acc_refined / max(cnt, 1.0)) avg_det_num = (final_total / max(cnt, 1.0)) logger.info('final average detections: %.3f' % avg_det_num) logger.info('final average cls acc: %.3f' % avg_cls_acc) logger.info('final average cls acc refined: %.3f' % avg_cls_acc_refined) ret_dict['rcnn_cls_acc'] = avg_cls_acc ret_dict['rcnn_cls_acc_refined'] = avg_cls_acc_refined ret_dict['rcnn_avg_num'] = avg_det_num for idx, thresh in enumerate(thresh_list): cur_roi_recall = total_roi_recalled_bbox_list[idx] / max( total_gt_bbox, 1.0) logger.info('total roi bbox recall(thresh=%.3f): %d / %d = %f' % (thresh, total_roi_recalled_bbox_list[idx], total_gt_bbox, cur_roi_recall)) ret_dict['rpn_recall(thresh=%.2f)' % thresh] = cur_roi_recall for idx, thresh in enumerate(thresh_list): cur_recall = total_recalled_bbox_list[idx] / max(total_gt_bbox, 1.0) logger.info( 'total bbox recall(thresh=%.3f): %d / %d = %f' % (thresh, total_recalled_bbox_list[idx], total_gt_bbox, cur_recall)) ret_dict['rcnn_recall(thresh=%.2f)' % thresh] = cur_recall if cfg.TEST.SPLIT != 'test': logger.info('Averate Precision:') name_to_class = {'Car': 0, 'Pedestrian': 1, 'Cyclist': 2} ap_result_str, ap_dict = kitti_evaluate( dataset.label_dir, final_output_dir, label_split_file=split_file, current_class=name_to_class[cfg.CLASSES]) logger.info(ap_result_str) ret_dict.update(ap_dict) logger.info('result is saved to: %s' % result_dir) return ret_dict
def pc_cb(self, data): pts_input = self.extract_networks_input_from_pc2rosmsg(data) if self.pc_pub is not None: self.pc_pub.publish(numpy2pc2(pts_input, data.header.frame_id)) np.random.seed(666) with torch.no_grad(): # 准备输入数据 MEAN_SIZE = torch.from_numpy(cfg.CLS_MEAN_SIZE[0]).cuda() inputs = torch.from_numpy(pts_input).cuda(non_blocking=True).float() inputs = torch.unsqueeze(inputs, 0) # 模型推理 input_data = {'pts_input': inputs} ret_dict = self.model(input_data) # 分析结果 batch_size = 1 roi_scores_raw = ret_dict['roi_scores_raw'] # (B, M) 提案置信度预测 roi_boxes3d = ret_dict['rois'] # (B, M, 7) 提案框 seg_result = ret_dict['seg_result'].long() # (B, N) 前景点分割 rcnn_cls = ret_dict['rcnn_cls'].view(batch_size, -1, ret_dict['rcnn_cls'].shape[1]) # (B, M, n) bin分类结果 rcnn_reg = ret_dict['rcnn_reg'].view(batch_size, -1, ret_dict['rcnn_reg'].shape[1]) # (B, M, C) res回归结果 # 解算3D BBOx anchor_size = MEAN_SIZE pred_boxes3d = decode_bbox_target(roi_boxes3d.view(-1, 7), rcnn_reg.view(-1, rcnn_reg.shape[-1]), anchor_size=anchor_size, loc_scope=cfg.RCNN.LOC_SCOPE, loc_bin_size=cfg.RCNN.LOC_BIN_SIZE, num_head_bin=cfg.RCNN.NUM_HEAD_BIN, get_xz_fine=True, get_y_by_bin=cfg.RCNN.LOC_Y_BY_BIN, loc_y_scope=cfg.RCNN.LOC_Y_SCOPE, loc_y_bin_size=cfg.RCNN.LOC_Y_BIN_SIZE, get_ry_fine=True).view(batch_size, -1, 7) # cfg.SCORE_THRESH 置信度阈值 if rcnn_cls.shape[2] == 1: batch_raw_scores = rcnn_cls # (B, M, 1) batch_norm_scores = torch.sigmoid(batch_raw_scores) # (B,M,1) batch_pred_classes = (batch_norm_scores > cfg.RCNN.SCORE_THRESH).long() # (B,M,1) else: batch_pred_classes = torch.argmax(rcnn_cls, dim=1).view(-1) batch_raw_scores = rcnn_cls[:, batch_pred_classes] batch_norm_scores = F.softmax(rcnn_cls, dim=1)[:, batch_pred_classes] # scores threshold inds = batch_norm_scores > cfg.RCNN.SCORE_THRESH for batch in range(batch_size): inds_in_each_batch = inds[batch].view(-1) if inds_in_each_batch.sum() == 0: # batch 内没有超过阈值的3dbbox continue pred_boxes3d_in_each_batch = pred_boxes3d[batch, inds_in_each_batch] raw_scores_in_each_batch = batch_raw_scores[batch, inds_in_each_batch] norm_scores_in_each_batch = batch_norm_scores[batch, inds_in_each_batch] # 非极大值抑制 boxes_bev_in_each_batch = kitti_utils.boxes3d_to_bev_torch(pred_boxes3d_in_each_batch) keep_idx = iou3d_utils.nms_gpu(boxes_bev_in_each_batch, raw_scores_in_each_batch, cfg.RCNN.NMS_THRESH).view(-1) pred_boxes3d_in_each_batch = pred_boxes3d_in_each_batch[keep_idx] raw_scores_in_each_batch = raw_scores_in_each_batch[keep_idx] output = {'boxes3d': pred_boxes3d_in_each_batch.cpu().numpy(), 'scores': raw_scores_in_each_batch.cpu().numpy()} self.visualize(output, data.header.frame_id)
def eval_one_epoch_joint_single_file(model, file_path, result_dir, logger): np.random.seed(666) input_list = get_lidar(file_path) # Loads the mean size of the CLASS from CFG YAML file MEAN_SIZE = torch.from_numpy(cfg.CLS_MEAN_SIZE[0]).cuda() # Assign the MODE as TEST unless EVAL specified mode = 'TEST' filename = file_path.split('/')[-1].split('.')[0] # Make output directory result_dir/final_result/data final_output_dir = os.path.join(result_dir, 'final_result', 'data') os.makedirs(final_output_dir, exist_ok=True) # Save data if args.save_result is True or not(default now True) if args.save_result: roi_output_dir = os.path.join(result_dir, 'roi_result', 'data') refine_output_dir = os.path.join(result_dir, 'refine_result', 'data') rpn_output_dir = os.path.join(result_dir, 'rpn_result', 'data') os.makedirs(rpn_output_dir, exist_ok=True) os.makedirs(roi_output_dir, exist_ok=True) os.makedirs(refine_output_dir, exist_ok=True) #logger.info('---- EPOCH %s JOINT EVALUATION ----' % epoch_id) logger.info('==> Output file: %s' % result_dir) model.eval() #input_data = input_data.cuda() cnt = final_total = total_cls_acc = total_cls_acc_refined = total_rpn_iou = 0 # Iterate through data in dataloader #cnt += 1 #sample_id, pts_rect, pts_features, pts_input = data['sample_id'], data['pts_rect'], data['pts_features'], data['pts_input'] #batch_size = len(sample_id) #inputs = torch.from_numpy(pts_input).cuda(non_blocking=True).float() pts_lidar = input_list[0] rem_pts = input_list[1] input_data = { 'pts_input': torch.from_numpy(pts_lidar).view(1, -1, 3).float().cuda() } # model inference ret_dict = model(input_data) roi_scores_raw = ret_dict['roi_scores_raw'] # (B, M) roi_boxes3d = ret_dict['rois'] # (B, M, 7) seg_result = ret_dict['seg_result'].long() # (B, N) rcnn_cls = ret_dict['rcnn_cls'].view(1, -1, ret_dict['rcnn_cls'].shape[1]) rcnn_reg = ret_dict['rcnn_reg'].view( 1, -1, ret_dict['rcnn_reg'].shape[1]) # (B, M, C) # bounding box regression anchor_size = MEAN_SIZE if cfg.RCNN.SIZE_RES_ON_ROI: assert False pred_boxes3d = decode_bbox_target(roi_boxes3d.view(-1, 7), rcnn_reg.view(-1, rcnn_reg.shape[-1]), anchor_size=anchor_size, loc_scope=cfg.RCNN.LOC_SCOPE, loc_bin_size=cfg.RCNN.LOC_BIN_SIZE, num_head_bin=cfg.RCNN.NUM_HEAD_BIN, get_xz_fine=True, get_y_by_bin=cfg.RCNN.LOC_Y_BY_BIN, loc_y_scope=cfg.RCNN.LOC_Y_SCOPE, loc_y_bin_size=cfg.RCNN.LOC_Y_BIN_SIZE, get_ry_fine=True).view(1, -1, 7) # scoring if rcnn_cls.shape[2] == 1: raw_scores = rcnn_cls # (B, M, 1) norm_scores = torch.sigmoid(raw_scores) pred_classes = (norm_scores > cfg.RCNN.SCORE_THRESH).long() else: pred_classes = torch.argmax(rcnn_cls, dim=1).view(-1) cls_norm_scores = F.softmax(rcnn_cls, dim=1) raw_scores = rcnn_cls[:, pred_classes] norm_scores = cls_norm_scores[:, pred_classes] if args.save_result: # save roi and refine results roi_boxes3d_np = roi_boxes3d.cpu().numpy() pred_boxes3d_np = pred_boxes3d.cpu().numpy() roi_scores_raw_np = roi_scores_raw.cpu().numpy() raw_scores_np = raw_scores.cpu().numpy() rpn_cls_np = ret_dict['rpn_cls'].cpu().numpy() rpn_xyz_np = ret_dict['backbone_xyz'].cpu().numpy() rpn_xyz_np = np.concatenate([ rpn_xyz_np[0][:, 2].reshape( -1, 1), -rpn_xyz_np[0][:, 0].reshape(-1, 1), -rpn_xyz_np[0][:, 1].reshape(-1, 1) ], axis=1).reshape(1, -1, 3) seg_result_np = seg_result.cpu().numpy() rem_pts = np.concatenate([ rem_pts[:, 2].reshape(-1, 1), -rem_pts[:, 0].reshape(-1, 1), -rem_pts[:, 1].reshape(-1, 1) ], axis=1) rest_lidar_pts = np.hstack( (rem_pts, np.zeros(rem_pts.shape[0]).reshape(-1, 1), np.zeros(rem_pts.shape[0]).reshape(-1, 1))).reshape(1, -1, 5) output_data = np.concatenate((rpn_xyz_np, rpn_cls_np.reshape( 1, -1, 1), seg_result_np.reshape(1, -1, 1)), axis=2) output_data = np.hstack((rest_lidar_pts, output_data)) cur_sample_id = 0 output_file = os.path.join(rpn_output_dir, filename + '.npy') np.save(output_file, output_data.astype(np.float32)) # scores thresh inds = norm_scores > cfg.RCNN.SCORE_THRESH cur_inds = inds[0].view(-1) pred_boxes3d_selected = pred_boxes3d[0, cur_inds] raw_scores_selected = raw_scores[0, cur_inds] norm_scores_selected = norm_scores[0, cur_inds] # NMS thresh # rotated nms boxes_bev_selected = kitti_utils.boxes3d_to_bev_torch( pred_boxes3d_selected) keep_idx = iou3d_utils.nms_gpu(boxes_bev_selected, raw_scores_selected, cfg.RCNN.NMS_THRESH).view(-1) pred_boxes3d_selected = pred_boxes3d_selected[keep_idx] scores_selected = raw_scores_selected[keep_idx] pred_boxes3d_selected, scores_selected = pred_boxes3d_selected.cpu().numpy( ), scores_selected.cpu().numpy() cur_sample_id = 0 final_total += pred_boxes3d_selected.shape[0] save_kitti_format(cur_sample_id, pred_boxes3d_selected, final_output_dir, scores_selected, filename) ret_dict = {} logger.info('final average detections: %.3f' % final_total) return ret_dict
def forward(self, input_data): """ :param input_data: input dict :return: """ input_data2 = input_data.copy() pred_boxes3d_1st = input_data2['pred_boxes3d_1st'] ret_dict = {} batch_size = input_data['roi_boxes3d'].size(0) if self.training: input_data2['roi_boxes3d'] = pred_boxes3d_1st with torch.no_grad(): target_dict_2nd = self.proposal_target_layer(input_data2, stage=2) pts_input_2 = torch.cat((target_dict_2nd['sampled_pts'], target_dict_2nd['pts_feature']), dim=2) target_dict_2nd['pts_input'] = pts_input_2 roi = target_dict_2nd['roi_boxes3d'] #roi = pred_boxes3d_1st else: input_data2['roi_boxes3d'] = pred_boxes3d_1st #input_data2['roi_boxes3d']=torch.cat((pred_boxes3d_1st, input_data['roi_boxes3d']), 1) roi = pred_boxes3d_1st #roi=torch.cat((pred_boxes3d_1st, input_data['roi_boxes3d']), 1) pts_input_2 = self.roipooling(input_data2) xyz_2, features_2 = self._break_up_pc(pts_input_2) #print(xyz_2.size(),xyz.size(),features_2.size(),features.size()) if cfg.RCNN.USE_RPN_FEATURES: xyz_input_2 = pts_input_2[..., 0:self.rcnn_input_channel].transpose( 1, 2).unsqueeze(dim=3) xyz_feature_2 = self.xyz_up_layer(xyz_input_2) rpn_feature_2 = pts_input_2[..., self.rcnn_input_channel:].transpose( 1, 2).unsqueeze(dim=3) merged_feature_2 = torch.cat((xyz_feature_2, rpn_feature_2), dim=1) merged_feature_2 = self.merge_down_layer(merged_feature_2) l_xyz_2, l_features_2 = [xyz_2], [merged_feature_2.squeeze(dim=3)] else: l_xyz__2, l_features_2 = [xyz_2], [features_2] #print(l_xyz_2[0].size(), l_xyz[0].size(), l_features_2[0].size(), l_features[0].size()) for i in range(len(self.SA_modules)): li_xyz_2, li_features_2 = self.SA_modules[i](l_xyz_2[i], l_features_2[i]) l_xyz_2.append(li_xyz_2) l_features_2.append(li_features_2) batch_size_2 = pts_input_2.shape[0] anchor_size = torch.from_numpy(cfg.CLS_MEAN_SIZE[0]).cuda() rcnn_cls_2nd = self.cls_layer_2nd(l_features_2[-1]).transpose( 1, 2).contiguous().squeeze(dim=1) # (B*64, 1 or 2) rcnn_reg_2nd = self.reg_layer_2nd(l_features_2[-1]).transpose( 1, 2).contiguous().squeeze(dim=1) # (B*64, C) pre_iou2 = self.iou_layer(l_features_2[-1]).transpose( 1, 2).contiguous().squeeze(dim=1) #loss if self.training: cls_label = target_dict_2nd['cls_label'].float() rcnn_cls_flat = rcnn_cls_2nd.view(-1) batch_loss_cls = F.binary_cross_entropy( torch.sigmoid(rcnn_cls_flat), cls_label.view(-1), reduction='none') cls_label_flat = cls_label.view(-1) cls_valid_mask = (cls_label_flat >= 0).float() rcnn_loss_cls = (batch_loss_cls * cls_valid_mask).sum() / torch.clamp( cls_valid_mask.sum(), min=1.0) gt_boxes3d_ct = target_dict_2nd['gt_of_rois'] reg_valid_mask = target_dict_2nd['reg_valid_mask'] fg_mask = (reg_valid_mask > 0) #print(rcnn_reg_2nd.view(batch_size_2, -1)[fg_mask].size(0)) if rcnn_reg_2nd.view(batch_size_2, -1)[fg_mask].size(0) == 0: fg_mask = (reg_valid_mask <= 0) loss_loc, loss_angle, loss_size, reg_loss_dict = \ loss_utils.get_reg_loss(rcnn_reg_2nd.view(batch_size_2, -1)[fg_mask], gt_boxes3d_ct.view(batch_size_2, 7)[fg_mask], loc_scope=cfg.RCNN.LOC_SCOPE, loc_bin_size=cfg.RCNN.LOC_BIN_SIZE, num_head_bin=cfg.RCNN.NUM_HEAD_BIN, anchor_size=anchor_size, get_xz_fine=True, get_y_by_bin=cfg.RCNN.LOC_Y_BY_BIN, loc_y_scope=cfg.RCNN.LOC_Y_SCOPE, loc_y_bin_size=cfg.RCNN.LOC_Y_BIN_SIZE, get_ry_fine=True) rcnn_loss_reg = loss_loc + loss_angle + 3 * loss_size two = { 'rcnn_loss_cls_2nd': rcnn_loss_cls, 'rcnn_loss_reg_2nd': rcnn_loss_reg } else: two = {} sec = {'rcnn_cls_2nd': rcnn_cls_2nd, 'rcnn_reg_2nd': rcnn_reg_2nd} #print(input_data['roi_boxes3d'].shape,input_data2['roi_boxes3d'].shape) pred_boxes3d_2nd = decode_bbox_target( roi.view(-1, 7), rcnn_reg_2nd.view(-1, rcnn_reg_2nd.shape[-1]), anchor_size=anchor_size, loc_scope=cfg.RCNN.LOC_SCOPE, loc_bin_size=cfg.RCNN.LOC_BIN_SIZE, num_head_bin=cfg.RCNN.NUM_HEAD_BIN, get_xz_fine=True, get_y_by_bin=cfg.RCNN.LOC_Y_BY_BIN, loc_y_scope=cfg.RCNN.LOC_Y_SCOPE, loc_y_bin_size=cfg.RCNN.LOC_Y_BIN_SIZE, get_ry_fine=True).view(batch_size, -1, 7) input_data3 = input_data.copy() if self.training: input_data3['roi_boxes3d'] = pred_boxes3d_2nd # print(input_data3['roi_boxes3d'].shape) with torch.no_grad(): target_dict_3rd = self.proposal_target_layer(input_data3, stage=3) pts_input_3 = torch.cat((target_dict_3rd['sampled_pts'], target_dict_3rd['pts_feature']), dim=2) target_dict_3rd['pts_input'] = pts_input_3 roi = target_dict_3rd['roi_boxes3d'] #roi = pred_boxes3d_2nd else: input_data3['roi_boxes3d'] = pred_boxes3d_2nd # input_data3['roi_boxes3d']=torch.cat((pred_boxes3d_2nd, input_data2['roi_boxes3d']), 1) roi = pred_boxes3d_2nd # roi=torch.cat((pred_boxes3d_2nd, input_data2['roi_boxes3d']), 1) pts_input_3 = self.roipooling(input_data3) xyz_3, features_3 = self._break_up_pc(pts_input_3) if cfg.RCNN.USE_RPN_FEATURES: xyz_input_3 = pts_input_3[..., 0:self.rcnn_input_channel].transpose( 1, 2).unsqueeze(dim=3) xyz_feature_3 = self.xyz_up_layer_3(xyz_input_3) rpn_feature_3 = pts_input_3[..., self.rcnn_input_channel:].transpose( 1, 2).unsqueeze(dim=3) merged_feature_3 = torch.cat((xyz_feature_3, rpn_feature_3), dim=1) merged_feature_3 = self.merge_down_layer_3(merged_feature_3) l_xyz_3, l_features_3 = [xyz_3], [merged_feature_3.squeeze(dim=3)] else: l_xyz, l_features = [xyz_3], [features_3] for i in range(len(self.SA_modules_3)): li_xyz_3, li_features_3 = self.SA_modules_3[i](l_xyz_3[i], l_features_3[i]) l_xyz_3.append(li_xyz_3) l_features_3.append(li_features_3) del xyz_2, features_2, l_features_2 rcnn_cls_3rd = self.cls_layer_3rd(l_features_3[-1]).transpose( 1, 2).contiguous().squeeze(dim=1) # (B*64, 1 or 2) rcnn_reg_3rd = self.reg_layer_3rd(l_features_3[-1]).transpose( 1, 2).contiguous().squeeze(dim=1) # (B*64, C) pre_iou3 = self.iou_layer(l_features_3[-1]).transpose( 1, 2).contiguous().squeeze(dim=1) # loss if self.training: cls_label = target_dict_3rd['cls_label'].float() rcnn_cls_flat = rcnn_cls_3rd.view(-1) batch_loss_cls = F.binary_cross_entropy( torch.sigmoid(rcnn_cls_flat), cls_label, reduction='none') cls_label_flat = cls_label.view(-1) cls_valid_mask = (cls_label_flat >= 0).float() rcnn_loss_cls = (batch_loss_cls * cls_valid_mask).sum() / torch.clamp( cls_valid_mask.sum(), min=1.0) gt_boxes3d_ct = target_dict_3rd['gt_of_rois'] reg_valid_mask = target_dict_3rd['reg_valid_mask'] fg_mask = (reg_valid_mask > 0) if rcnn_reg_3rd.view(batch_size_2, -1)[fg_mask].size(0) == 0: fg_mask = (reg_valid_mask <= 0) loss_loc, loss_angle, loss_size, reg_loss_dict = \ loss_utils.get_reg_loss(rcnn_reg_3rd.view(batch_size_2, -1)[fg_mask], gt_boxes3d_ct.view(batch_size_2, 7)[fg_mask], loc_scope=cfg.RCNN.LOC_SCOPE, loc_bin_size=cfg.RCNN.LOC_BIN_SIZE, num_head_bin=cfg.RCNN.NUM_HEAD_BIN, anchor_size=anchor_size, get_xz_fine=True, get_y_by_bin=cfg.RCNN.LOC_Y_BY_BIN, loc_y_scope=cfg.RCNN.LOC_Y_SCOPE, loc_y_bin_size=cfg.RCNN.LOC_Y_BIN_SIZE, get_ry_fine=True) rcnn_loss_reg = loss_loc + loss_angle + 3 * loss_size # three = {'rcnn_loss_cls_3rd': rcnn_loss_cls, 'rcnn_loss_reg_3rd': rcnn_loss_reg} else: three = {} pred_boxes3d_3rd = decode_bbox_target( roi.view(-1, 7), rcnn_reg_3rd.view(-1, rcnn_reg_3rd.shape[-1]), anchor_size=anchor_size, loc_scope=cfg.RCNN.LOC_SCOPE, loc_bin_size=cfg.RCNN.LOC_BIN_SIZE, num_head_bin=cfg.RCNN.NUM_HEAD_BIN, get_xz_fine=True, get_y_by_bin=cfg.RCNN.LOC_Y_BY_BIN, loc_y_scope=cfg.RCNN.LOC_Y_SCOPE, loc_y_bin_size=cfg.RCNN.LOC_Y_BIN_SIZE, get_ry_fine=True).view(batch_size, -1, 7) if self.training: gt = target_dict_3rd['real_gt'] iou_label = [] for i in range(batch_size_2): iou_label.append( iou3d_utils.boxes_iou3d_gpu( pred_boxes3d_3rd.view(-1, 7)[i].view(1, 7), gt[i].view(1, 7))) iou_label = torch.cat(iou_label) iou_label = (iou_label - 0.5) * 2 iou_loss = F.mse_loss((pre_iou3[fg_mask]), iou_label[fg_mask]) #print(iou_loss.item()) three = { 'rcnn_loss_cls_3rd': rcnn_loss_cls, 'rcnn_loss_reg_3rd': rcnn_loss_reg, 'rcnn_iou_loss': iou_loss } del cls_label, rcnn_cls_flat, batch_loss_cls, cls_label_flat, cls_valid_mask, rcnn_loss_cls, gt_boxes3d_ct, reg_valid_mask, fg_mask pre_iou3 = pre_iou3 / 2 + 0.5 pre_iou2 = pre_iou2 / 2 + 0.5 ret_dict = { 'rcnn_cls_3rd': rcnn_cls_3rd, 'rcnn_reg_3rd': rcnn_reg_3rd, 'pred_boxes3d_1st': pred_boxes3d_1st, 'pred_boxes3d_2nd': pred_boxes3d_2nd, 'pred_boxes3d_3rd': pred_boxes3d_3rd, 'pre_iou3': pre_iou3, 'pre_iou2': pre_iou2 } ret_dict.update(sec) ret_dict.update(two) ret_dict.update(three) return ret_dict
def forward(self, input_data): """ :param input_data: input dict :return: """ if cfg.RCNN.ROI_SAMPLE_JIT: if self.training: with torch.no_grad(): target_dict = self.proposal_target_layer(input_data, stage=1) pts_input = torch.cat( (target_dict['sampled_pts'], target_dict['pts_feature']), dim=2) target_dict['pts_input'] = pts_input else: rpn_xyz, rpn_features = input_data['rpn_xyz'], input_data[ 'rpn_features'] batch_rois = input_data['roi_boxes3d'] if cfg.RCNN.USE_INTENSITY: pts_extra_input_list = [ input_data['rpn_intensity'].unsqueeze(dim=2), input_data['seg_mask'].unsqueeze(dim=2) ] else: pts_extra_input_list = [ input_data['seg_mask'].unsqueeze(dim=2) ] if cfg.RCNN.USE_DEPTH: pts_depth = input_data['pts_depth'] / 70.0 - 0.5 pts_extra_input_list.append(pts_depth.unsqueeze(dim=2)) pts_extra_input = torch.cat(pts_extra_input_list, dim=2) pts_feature = torch.cat((pts_extra_input, rpn_features), dim=2) pooled_features, pooled_empty_flag = \ roipool3d_utils.roipool3d_gpu(rpn_xyz, pts_feature, batch_rois, cfg.RCNN.POOL_EXTRA_WIDTH, sampled_pt_num=cfg.RCNN.NUM_POINTS) # canonical transformation batch_size = batch_rois.shape[0] roi_center = batch_rois[:, :, 0:3] pooled_features[:, :, :, 0:3] -= roi_center.unsqueeze(dim=2) for k in range(batch_size): pooled_features[k, :, :, 0:3] = kitti_utils.rotate_pc_along_y_torch( pooled_features[k, :, :, 0:3], batch_rois[k, :, 6]) pts_input = pooled_features.view(-1, pooled_features.shape[2], pooled_features.shape[3]) else: pts_input = input_data['pts_input'] target_dict = {} target_dict['pts_input'] = input_data['pts_input'] target_dict['roi_boxes3d'] = input_data['roi_boxes3d'] if self.training: target_dict['cls_label'] = input_data['cls_label'] target_dict['reg_valid_mask'] = input_data['reg_valid_mask'] target_dict['gt_of_rois'] = input_data['gt_boxes3d_ct'] xyz, features = self._break_up_pc(pts_input) if cfg.RCNN.USE_RPN_FEATURES: xyz_input = pts_input[..., 0:self.rcnn_input_channel].transpose( 1, 2).unsqueeze(dim=3) xyz_feature = self.xyz_up_layer(xyz_input) rpn_feature = pts_input[..., self.rcnn_input_channel:].transpose( 1, 2).unsqueeze(dim=3) merged_feature = torch.cat((xyz_feature, rpn_feature), dim=1) merged_feature = self.merge_down_layer(merged_feature) l_xyz, l_features = [xyz], [merged_feature.squeeze(dim=3)] else: l_xyz, l_features = [xyz], [features] for i in range(len(self.SA_modules)): li_xyz, li_features = self.SA_modules[i](l_xyz[i], l_features[i]) l_xyz.append(li_xyz) l_features.append(li_features) batch_size = input_data['roi_boxes3d'].size(0) rcnn_cls = self.cls_layer(l_features[-1]).transpose( 1, 2).contiguous().squeeze(dim=1) # (B*64, 1 or 2) rcnn_reg = self.reg_layer(l_features[-1]).transpose( 1, 2).contiguous().squeeze(dim=1) # (B*64, C) #tt=torch.rand(rcnn_reg.shape[0],rcnn_reg.shape[1]).cuda() #tt=self.test_layer(pts_input.permute((0,2,1))) #tt=tt[:,:,0] #rcnn_reg=tt #rcnn_cls=tt[:,0] #print(tt.size(),rcnn_cls.size()) roi_boxes3d = target_dict['roi_boxes3d'].view(-1, 7) anchor_size = torch.from_numpy(cfg.CLS_MEAN_SIZE[0]).cuda() #print(rcnn_reg.size(),roi_boxes3d.size()) pred_boxes3d_1st = decode_bbox_target( roi_boxes3d.view(-1, 7), rcnn_reg.view(-1, rcnn_reg.shape[-1]), anchor_size=anchor_size, loc_scope=cfg.RCNN.LOC_SCOPE, loc_bin_size=cfg.RCNN.LOC_BIN_SIZE, num_head_bin=cfg.RCNN.NUM_HEAD_BIN, get_xz_fine=True, get_y_by_bin=cfg.RCNN.LOC_Y_BY_BIN, loc_y_scope=cfg.RCNN.LOC_Y_SCOPE, loc_y_bin_size=cfg.RCNN.LOC_Y_BIN_SIZE, get_ry_fine=True).view(batch_size, -1, 7) #print(pred_boxes3d.size()) (B,64,7) ## 2nd stage #print(input_data['roi_boxes3d'].size()) input_data2 = input_data input_data2['roi_boxes3d'] = pred_boxes3d_1st #print(input_data['roi_boxes3d'].size()) with torch.no_grad(): target_dict_2nd = self.proposal_target_layer(input_data2, stage=2) pts_input_2 = torch.cat( (target_dict_2nd['sampled_pts'], target_dict_2nd['pts_feature']), dim=2) target_dict_2nd['pts_input'] = pts_input_2 xyz_2, features_2 = self._break_up_pc(pts_input_2) #print(xyz_2.size(),xyz.size(),features_2.size(),features.size()) if cfg.RCNN.USE_RPN_FEATURES: xyz_input_2 = pts_input_2[..., 0:self.rcnn_input_channel].transpose( 1, 2).unsqueeze(dim=3) xyz_feature_2 = self.xyz_up_layer(xyz_input_2) rpn_feature_2 = pts_input_2[..., self.rcnn_input_channel:].transpose( 1, 2).unsqueeze(dim=3) merged_feature_2 = torch.cat((xyz_feature_2, rpn_feature_2), dim=1) merged_feature_2 = self.merge_down_layer(merged_feature_2) l_xyz_2, l_features_2 = [xyz_2], [merged_feature_2.squeeze(dim=3)] else: l_xyz__2, l_features_2 = [xyz_2], [features_2] #print(l_xyz_2[0].size(), l_xyz[0].size(), l_features_2[0].size(), l_features[0].size()) for i in range(len(self.SA_modules)): li_xyz_2, li_features_2 = self.SA_modules[i](l_xyz_2[i], l_features_2[i]) l_xyz_2.append(li_xyz_2) l_features_2.append(li_features_2) del xyz, features, l_features rcnn_cls_2nd = self.cls_layer_2nd(l_features_2[-1]).transpose( 1, 2).contiguous().squeeze(dim=1) # (B*64, 1 or 2) rcnn_reg_2nd = self.reg_layer_2nd(l_features_2[-1]).transpose( 1, 2).contiguous().squeeze(dim=1) # (B*64, C) #loss ''' cls_label = target_dict_2nd['cls_label'].float() cls_label_flat = cls_label.view(-1) rcnn_cls_flat_2nd = rcnn_cls_2nd.view(-1) cls_valid_mask = (cls_label_flat >= 0).float() batch_loss_cls_2nd = F.binary_cross_entropy(torch.sigmoid(rcnn_cls_flat_2nd), cls_label, reduction='none') rcnn_loss_cls_2nd = (batch_loss_cls_2nd * cls_valid_mask).sum() / torch.clamp(cls_valid_mask.sum(), min=1.0) #rcnn_loss_cls_2nd.backward() ''' sec = {'rcnn_cls_2nd': rcnn_cls_2nd, 'rcnn_reg_2nd': rcnn_reg_2nd} pred_boxes3d_2nd = decode_bbox_target( pred_boxes3d_1st.view(-1, 7), rcnn_reg_2nd.view(-1, rcnn_reg_2nd.shape[-1]), anchor_size=anchor_size, loc_scope=cfg.RCNN.LOC_SCOPE, loc_bin_size=cfg.RCNN.LOC_BIN_SIZE, num_head_bin=cfg.RCNN.NUM_HEAD_BIN, get_xz_fine=True, get_y_by_bin=cfg.RCNN.LOC_Y_BY_BIN, loc_y_scope=cfg.RCNN.LOC_Y_SCOPE, loc_y_bin_size=cfg.RCNN.LOC_Y_BIN_SIZE, get_ry_fine=True).view(batch_size, -1, 7) ## 3rd #print(pred_boxes3d_2nd.size()) input_data['roi_boxes3d'] = pred_boxes3d_2nd with torch.no_grad(): target_dict_3nd = self.proposal_target_layer(input_data, stage=3) pts_input_3 = torch.cat( (target_dict_3nd['sampled_pts'], target_dict_3nd['pts_feature']), dim=2) target_dict_3nd['pts_input'] = pts_input_3 xyz_3, features_3 = self._break_up_pc(pts_input_3) if cfg.RCNN.USE_RPN_FEATURES: xyz_input_3 = pts_input[..., 0:self.rcnn_input_channel].transpose( 1, 2).unsqueeze(dim=3) xyz_feature_3 = self.xyz_up_layer(xyz_input_3) rpn_feature_3 = pts_input_3[..., self.rcnn_input_channel:].transpose( 1, 2).unsqueeze(dim=3) merged_feature_3 = torch.cat((xyz_feature_3, rpn_feature_3), dim=1) merged_feature_3 = self.merge_down_layer(merged_feature_3) l_xyz_3, l_features_3 = [xyz_3], [merged_feature_3.squeeze(dim=3)] else: l_xyz, l_features = [xyz_3], [features_3] for i in range(len(self.SA_modules)): li_xyz_3, li_features_3 = self.SA_modules[i](l_xyz_3[i], l_features_3[i]) l_xyz_3.append(li_xyz_3) l_features_3.append(li_features_3) del xyz_2, features_2, l_features_2 rcnn_cls_3rd = self.cls_layer_3rd(l_features_3[-1]).transpose( 1, 2).contiguous().squeeze(dim=1) # (B*64, 1 or 2) rcnn_reg_3rd = self.reg_layer_3rd(l_features_3[-1]).transpose( 1, 2).contiguous().squeeze(dim=1) # (B*64, C) pred_boxes3d_3rd = decode_bbox_target( pred_boxes3d_2nd.view(-1, 7), rcnn_reg_3rd.view(-1, rcnn_reg_3rd.shape[-1]), anchor_size=anchor_size, loc_scope=cfg.RCNN.LOC_SCOPE, loc_bin_size=cfg.RCNN.LOC_BIN_SIZE, num_head_bin=cfg.RCNN.NUM_HEAD_BIN, get_xz_fine=True, get_y_by_bin=cfg.RCNN.LOC_Y_BY_BIN, loc_y_scope=cfg.RCNN.LOC_Y_SCOPE, loc_y_bin_size=cfg.RCNN.LOC_Y_BIN_SIZE, get_ry_fine=True).view(batch_size, -1, 7) ret_dict = { 'rcnn_cls': rcnn_cls, 'rcnn_reg': rcnn_reg, 'rcnn_cls_3rd': rcnn_cls_3rd, 'rcnn_reg_3rd': rcnn_reg_3rd, 'pred_boxes3d_1st': pred_boxes3d_1st, 'pred_boxes3d_2nd': pred_boxes3d_2nd, 'pred_boxes3d_3rd': pred_boxes3d_3rd } ret_dict.update(sec) if self.training: ret_dict.update(target_dict) return ret_dict
def forward(self, input_data): """ :param input_data: input dict :return: """ if cfg.RCNN.ROI_SAMPLE_JIT: if self.training: with torch.no_grad(): target_dict = self.proposal_target_layer(input_data, stage=1) pts_input = torch.cat( (target_dict['sampled_pts'], target_dict['pts_feature']), dim=2) target_dict['pts_input'] = pts_input else: rpn_xyz, rpn_features = input_data['rpn_xyz'], input_data[ 'rpn_features'] batch_rois = input_data['roi_boxes3d'] if cfg.RCNN.USE_INTENSITY: pts_extra_input_list = [ input_data['rpn_intensity'].unsqueeze(dim=2), input_data['seg_mask'].unsqueeze(dim=2) ] else: pts_extra_input_list = [ input_data['seg_mask'].unsqueeze(dim=2) ] if cfg.RCNN.USE_DEPTH: pts_depth = input_data['pts_depth'] / 70.0 - 0.5 pts_extra_input_list.append(pts_depth.unsqueeze(dim=2)) pts_extra_input = torch.cat(pts_extra_input_list, dim=2) pts_feature = torch.cat((pts_extra_input, rpn_features), dim=2) pooled_features, pooled_empty_flag = \ roipool3d_utils.roipool3d_gpu(rpn_xyz, pts_feature, batch_rois, cfg.RCNN.POOL_EXTRA_WIDTH, sampled_pt_num=cfg.RCNN.NUM_POINTS) # canonical transformation batch_size = batch_rois.shape[0] roi_center = batch_rois[:, :, 0:3] pooled_features[:, :, :, 0:3] -= roi_center.unsqueeze(dim=2) for k in range(batch_size): pooled_features[k, :, :, 0:3] = kitti_utils.rotate_pc_along_y_torch( pooled_features[k, :, :, 0:3], batch_rois[k, :, 6]) pts_input = pooled_features.view(-1, pooled_features.shape[2], pooled_features.shape[3]) else: pts_input = input_data['pts_input'] target_dict = {} target_dict['pts_input'] = input_data['pts_input'] target_dict['roi_boxes3d'] = input_data['roi_boxes3d'] if self.training: #input_data['ori_roi'] = torch.cat((input_data['ori_roi'], input_data['roi_boxes3d']), 1) target_dict['cls_label'] = input_data['cls_label'] target_dict['reg_valid_mask'] = input_data[ 'reg_valid_mask'].view(-1) target_dict['gt_of_rois'] = input_data['gt_boxes3d_ct'] #print(pts_input.shape) pts_input = pts_input.view(-1, 512, 128 + self.rcnn_input_channel) xyz, features = self._break_up_pc(pts_input) anchor_size = torch.from_numpy(cfg.CLS_MEAN_SIZE[0]).cuda() if cfg.RCNN.USE_RPN_FEATURES: xyz_input = pts_input[..., 0:self.rcnn_input_channel].transpose( 1, 2).unsqueeze(dim=3) #xyz_input = pts_input[..., 0:self.rcnn_input_channel].transpose(1, 2) xyz_feature = self.xyz_up_layer(xyz_input) rpn_feature = pts_input[..., self.rcnn_input_channel:].transpose( 1, 2).unsqueeze(dim=3) merged_feature = torch.cat((xyz_feature, rpn_feature), dim=1) merged_feature = self.merge_down_layer(merged_feature) l_xyz, l_features = [xyz], [merged_feature.squeeze(dim=3)] else: l_xyz, l_features = [xyz], [features] for i in range(len(self.SA_modules)): li_xyz, li_features = self.SA_modules[i](l_xyz[i], l_features[i]) l_xyz.append(li_xyz) l_features.append(li_features) batch_size = input_data['roi_boxes3d'].size(0) batch_size_2 = pts_input.shape[0] # for loss fun #print(input_data['roi_boxes3d'].shape,pts_input.shape) rcnn_cls = self.cls_layer(l_features[-1]).transpose( 1, 2).contiguous().squeeze(dim=1) # (B*64, 1 or 2) rcnn_reg = self.reg_layer(l_features[-1]).transpose( 1, 2).contiguous().squeeze(dim=1) # (B*64, C) if self.training: roi_boxes3d = target_dict['roi_boxes3d'].view(-1, 7) cls_label = target_dict['cls_label'].float() rcnn_cls_flat = rcnn_cls.view(-1) batch_loss_cls = F.binary_cross_entropy( torch.sigmoid(rcnn_cls_flat), cls_label.view(-1), reduction='none') cls_label_flat = cls_label.view(-1) cls_valid_mask = (cls_label_flat >= 0).float() rcnn_loss_cls = (batch_loss_cls * cls_valid_mask).sum() / torch.clamp( cls_valid_mask.sum(), min=1.0) gt_boxes3d_ct = target_dict['gt_of_rois'] reg_valid_mask = target_dict['reg_valid_mask'] fg_mask = (reg_valid_mask > 0) #print(rcnn_reg.view(batch_size_2, -1)[fg_mask].shape) loss_loc, loss_angle, loss_size, reg_loss_dict = \ loss_utils.get_reg_loss(rcnn_reg.view(batch_size_2, -1)[fg_mask], gt_boxes3d_ct.view(batch_size_2, 7)[fg_mask], loc_scope=cfg.RCNN.LOC_SCOPE, loc_bin_size=cfg.RCNN.LOC_BIN_SIZE, num_head_bin=cfg.RCNN.NUM_HEAD_BIN, anchor_size=anchor_size, get_xz_fine=True, get_y_by_bin=cfg.RCNN.LOC_Y_BY_BIN, loc_y_scope=cfg.RCNN.LOC_Y_SCOPE, loc_y_bin_size=cfg.RCNN.LOC_Y_BIN_SIZE, get_ry_fine=True) rcnn_loss_reg = loss_loc + loss_angle + 3 * loss_size one = { 'rcnn_loss_cls': rcnn_loss_cls, 'rcnn_loss_reg': rcnn_loss_reg } del cls_label, rcnn_cls_flat, batch_loss_cls, cls_label_flat, cls_valid_mask, rcnn_loss_cls, gt_boxes3d_ct, reg_valid_mask, fg_mask else: roi_boxes3d = input_data['roi_boxes3d'].view(-1, 7) one = {} #print(rcnn_reg.size(),roi_boxes3d.size()) #print(roi_boxes3d.shape, rcnn_reg.shape) pred_boxes3d_1st = decode_bbox_target( roi_boxes3d.view(-1, 7), rcnn_reg.view(-1, rcnn_reg.shape[-1]), anchor_size=anchor_size, loc_scope=cfg.RCNN.LOC_SCOPE, loc_bin_size=cfg.RCNN.LOC_BIN_SIZE, num_head_bin=cfg.RCNN.NUM_HEAD_BIN, get_xz_fine=True, get_y_by_bin=cfg.RCNN.LOC_Y_BY_BIN, loc_y_scope=cfg.RCNN.LOC_Y_SCOPE, loc_y_bin_size=cfg.RCNN.LOC_Y_BIN_SIZE, get_ry_fine=True).view(batch_size, -1, 7) if self.training == False and cfg.RCNN.ENABLED and not cfg.RPN.ENABLED: pred_boxes3d_1st = pred_boxes3d_1st.view(-1, 7) input_data2 = input_data.copy() #print(input_data['roi_boxes3d'].size()) if self.training: #input_data2['roi_boxes3d'] = torch.cat((pred_boxes3d_1st, input_data['ori_roi']), 1) input_data2['roi_boxes3d'] = torch.cat( (pred_boxes3d_1st, input_data['roi_boxes3d']), 1) #input_data2['roi_boxes3d'] = input_data['gt_boxes3d'] #input_data2['roi_boxes3d'] = pred_boxes3d_1st #print(input_data2['roi_boxes3d'].shape) with torch.no_grad(): target_dict_2nd = self.proposal_target_layer(input_data2, stage=2) ''' reg_valid_mask = target_dict_2nd['reg_valid_mask'] fg_mask_num2 = (reg_valid_mask > 0).sum() if fg_mask_num2< 10*batch_size: input_data2['roi_boxes3d'] = torch.cat((pred_boxes3d_1st, input_data['roi_boxes3d']), 1) with torch.no_grad(): target_dict_2nd = self.proposal_target_layer(input_data2, stage=2) ''' pts_input_2 = torch.cat((target_dict_2nd['sampled_pts'], target_dict_2nd['pts_feature']), dim=2) target_dict_2nd['pts_input'] = pts_input_2 roi = target_dict_2nd['roi_boxes3d'] else: input_data2['roi_boxes3d'] = pred_boxes3d_1st #input_data2['roi_boxes3d']=torch.cat((pred_boxes3d_1st, input_data['roi_boxes3d']), 1) roi = pred_boxes3d_1st #roi=torch.cat((pred_boxes3d_1st, input_data['roi_boxes3d']), 1) pts_input_2 = self.roipooling(input_data2) #print(pts_input_2.shape) xyz_2, features_2 = self._break_up_pc(pts_input_2) #print(xyz_2.size(),xyz.size(),features_2.size(),features.size()) if cfg.RCNN.USE_RPN_FEATURES: xyz_input_2 = pts_input_2[..., 0:self.rcnn_input_channel].transpose( 1, 2).unsqueeze(dim=3) xyz_feature_2 = self.xyz_up_layer(xyz_input_2) rpn_feature_2 = pts_input_2[..., self.rcnn_input_channel:].transpose( 1, 2).unsqueeze(dim=3) merged_feature_2 = torch.cat((xyz_feature_2, rpn_feature_2), dim=1) merged_feature_2 = self.merge_down_layer(merged_feature_2) l_xyz_2, l_features_2 = [xyz_2], [merged_feature_2.squeeze(dim=3)] else: l_xyz__2, l_features_2 = [xyz_2], [features_2] #print(l_xyz_2[0].size(), l_xyz[0].size(), l_features_2[0].size(), l_features[0].size()) for i in range(len(self.SA_modules)): li_xyz_2, li_features_2 = self.SA_modules[i](l_xyz_2[i], l_features_2[i]) l_xyz_2.append(li_xyz_2) l_features_2.append(li_features_2) del xyz, features, l_features rcnn_cls_2nd = self.cls_layer_2nd(l_features_2[-1]).transpose( 1, 2).contiguous().squeeze(dim=1) # (B*64, 1 or 2) rcnn_reg_2nd = self.reg_layer_2nd(l_features_2[-1]).transpose( 1, 2).contiguous().squeeze(dim=1) # (B*64, C) #loss if self.training: cls_label = target_dict_2nd['cls_label'].float() rcnn_cls_flat = rcnn_cls_2nd.view(-1) batch_loss_cls = F.binary_cross_entropy( torch.sigmoid(rcnn_cls_flat), cls_label.view(-1), reduction='none') cls_label_flat = cls_label.view(-1) cls_valid_mask = (cls_label_flat >= 0).float() rcnn_loss_cls = (batch_loss_cls * cls_valid_mask).sum() / torch.clamp( cls_valid_mask.sum(), min=1.0) gt_boxes3d_ct = target_dict_2nd['gt_of_rois'] reg_valid_mask = target_dict_2nd['reg_valid_mask'] fg_mask = (reg_valid_mask > 0) #print(rcnn_reg_2nd.view(batch_size_2, -1)[fg_mask].size(0)) if rcnn_reg_2nd.view(batch_size_2, -1)[fg_mask].size(0) == 0: fg_mask = (reg_valid_mask <= 0) loss_loc, loss_angle, loss_size, reg_loss_dict = \ loss_utils.get_reg_loss(rcnn_reg_2nd.view(batch_size_2, -1)[fg_mask], gt_boxes3d_ct.view(batch_size_2, 7)[fg_mask], loc_scope=cfg.RCNN.LOC_SCOPE, loc_bin_size=cfg.RCNN.LOC_BIN_SIZE, num_head_bin=cfg.RCNN.NUM_HEAD_BIN, anchor_size=anchor_size, get_xz_fine=True, get_y_by_bin=cfg.RCNN.LOC_Y_BY_BIN, loc_y_scope=cfg.RCNN.LOC_Y_SCOPE, loc_y_bin_size=cfg.RCNN.LOC_Y_BIN_SIZE, get_ry_fine=True) rcnn_loss_reg = loss_loc + loss_angle + 3 * loss_size two = { 'rcnn_loss_cls_2nd': rcnn_loss_cls, 'rcnn_loss_reg_2nd': rcnn_loss_reg } del cls_label, rcnn_cls_flat, batch_loss_cls, cls_label_flat, cls_valid_mask, rcnn_loss_cls, gt_boxes3d_ct, reg_valid_mask, fg_mask else: two = {} sec = {'rcnn_cls_2nd': rcnn_cls_2nd, 'rcnn_reg_2nd': rcnn_reg_2nd} #print(input_data['roi_boxes3d'].shape,input_data2['roi_boxes3d'].shape) pred_boxes3d_2nd = decode_bbox_target( roi.view(-1, 7), rcnn_reg_2nd.view(-1, rcnn_reg_2nd.shape[-1]), anchor_size=anchor_size, loc_scope=cfg.RCNN.LOC_SCOPE, loc_bin_size=cfg.RCNN.LOC_BIN_SIZE, num_head_bin=cfg.RCNN.NUM_HEAD_BIN, get_xz_fine=True, get_y_by_bin=cfg.RCNN.LOC_Y_BY_BIN, loc_y_scope=cfg.RCNN.LOC_Y_SCOPE, loc_y_bin_size=cfg.RCNN.LOC_Y_BIN_SIZE, get_ry_fine=True).view(batch_size, -1, 7) ## 3rd #print(target_dict['roi_boxes3d'].shape,target_dict_2nd['roi_boxes3d'].shape) #print(pred_boxes3d_1st.shape,input_data['roi_boxes3d'].shape) #print(target_dict['gt_of_rois']+target_dict['roi_boxes3d'],target_dict_2nd['gt_of_rois']+target_dict_2nd['roi_boxes3d']) input_data3 = input_data2.copy() #del input_data2 if self.training: input_data3['roi_boxes3d'] = torch.cat( (pred_boxes3d_2nd, input_data2['roi_boxes3d']), 1) #input_data3['roi_boxes3d'] = input_data2['gt_boxes3d'] #input_data3['roi_boxes3d'] = pred_boxes3d_2nd #print(input_data3['roi_boxes3d'].shape) with torch.no_grad(): target_dict_3rd = self.proposal_target_layer(input_data3, stage=3) ''' reg_valid_mask = target_dict_3rd['reg_valid_mask'] fg_mask_num3 = (reg_valid_mask > 0).sum() if fg_mask_num3.item() < 10 * batch_size: input_data3['roi_boxes3d'] = torch.cat((pred_boxes3d_2nd, input_data2['roi_boxes3d']), 1) with torch.no_grad(): target_dict_3rd = self.proposal_target_layer(input_data2, stage=3) ''' #print(fg_mask_num2.item(),fg_mask_num3.item()) pts_input_3 = torch.cat((target_dict_3rd['sampled_pts'], target_dict_3rd['pts_feature']), dim=2) target_dict_3rd['pts_input'] = pts_input_3 roi = target_dict_3rd['roi_boxes3d'] else: input_data3['roi_boxes3d'] = pred_boxes3d_2nd #input_data3['roi_boxes3d']=torch.cat((pred_boxes3d_2nd, input_data2['roi_boxes3d']), 1) roi = pred_boxes3d_2nd #roi=torch.cat((pred_boxes3d_2nd, input_data2['roi_boxes3d']), 1) pts_input_3 = self.roipooling(input_data3) xyz_3, features_3 = self._break_up_pc(pts_input_3) if cfg.RCNN.USE_RPN_FEATURES: xyz_input_3 = pts_input_3[..., 0:self.rcnn_input_channel].transpose( 1, 2).unsqueeze(dim=3) xyz_feature_3 = self.xyz_up_layer(xyz_input_3) rpn_feature_3 = pts_input_3[..., self.rcnn_input_channel:].transpose( 1, 2).unsqueeze(dim=3) merged_feature_3 = torch.cat((xyz_feature_3, rpn_feature_3), dim=1) merged_feature_3 = self.merge_down_layer(merged_feature_3) l_xyz_3, l_features_3 = [xyz_3], [merged_feature_3.squeeze(dim=3)] else: l_xyz, l_features = [xyz_3], [features_3] for i in range(len(self.SA_modules)): li_xyz_3, li_features_3 = self.SA_modules[i](l_xyz_3[i], l_features_3[i]) l_xyz_3.append(li_xyz_3) l_features_3.append(li_features_3) del xyz_2, features_2, l_features_2 rcnn_cls_3rd = self.cls_layer_3rd(l_features_3[-1]).transpose( 1, 2).contiguous().squeeze(dim=1) # (B*64, 1 or 2) rcnn_reg_3rd = self.reg_layer_3rd(l_features_3[-1]).transpose( 1, 2).contiguous().squeeze(dim=1) # (B*64, C) #loss if self.training: cls_label = target_dict_3rd['cls_label'].float() rcnn_cls_flat = rcnn_cls_3rd.view(-1) batch_loss_cls = F.binary_cross_entropy( torch.sigmoid(rcnn_cls_flat), cls_label, reduction='none') cls_label_flat = cls_label.view(-1) cls_valid_mask = (cls_label_flat >= 0).float() rcnn_loss_cls = (batch_loss_cls * cls_valid_mask).sum() / torch.clamp( cls_valid_mask.sum(), min=1.0) gt_boxes3d_ct = target_dict_3rd['gt_of_rois'] reg_valid_mask = target_dict_3rd['reg_valid_mask'] fg_mask = (reg_valid_mask > 0) #cls_mask=(target_dict_3rd['cls_label']>0) #print(rcnn_reg_3rd.view(batch_size_2, -1)[cls_mask].size(0)) #print(rcnn_reg_3rd.view(batch_size_2, -1)[fg_mask].size(0)) if rcnn_reg_3rd.view(batch_size_2, -1)[fg_mask].size(0) == 0: fg_mask = (reg_valid_mask <= 0) loss_loc, loss_angle, loss_size, reg_loss_dict = \ loss_utils.get_reg_loss(rcnn_reg_3rd.view(batch_size_2, -1)[fg_mask], gt_boxes3d_ct.view(batch_size_2, 7)[fg_mask], loc_scope=cfg.RCNN.LOC_SCOPE, loc_bin_size=cfg.RCNN.LOC_BIN_SIZE, num_head_bin=cfg.RCNN.NUM_HEAD_BIN, anchor_size=anchor_size, get_xz_fine=True, get_y_by_bin=cfg.RCNN.LOC_Y_BY_BIN, loc_y_scope=cfg.RCNN.LOC_Y_SCOPE, loc_y_bin_size=cfg.RCNN.LOC_Y_BIN_SIZE, get_ry_fine=True) rcnn_loss_reg = loss_loc + loss_angle + 3 * loss_size three = { 'rcnn_loss_cls_3rd': rcnn_loss_cls, 'rcnn_loss_reg_3rd': rcnn_loss_reg } del cls_label, rcnn_cls_flat, batch_loss_cls, cls_label_flat, cls_valid_mask, rcnn_loss_cls, gt_boxes3d_ct, reg_valid_mask, fg_mask else: three = {} pred_boxes3d_3rd = decode_bbox_target( roi.view(-1, 7), rcnn_reg_3rd.view(-1, rcnn_reg_3rd.shape[-1]), anchor_size=anchor_size, loc_scope=cfg.RCNN.LOC_SCOPE, loc_bin_size=cfg.RCNN.LOC_BIN_SIZE, num_head_bin=cfg.RCNN.NUM_HEAD_BIN, get_xz_fine=True, get_y_by_bin=cfg.RCNN.LOC_Y_BY_BIN, loc_y_scope=cfg.RCNN.LOC_Y_SCOPE, loc_y_bin_size=cfg.RCNN.LOC_Y_BIN_SIZE, get_ry_fine=True).view(batch_size, -1, 7) ret_dict = { 'rcnn_cls': rcnn_cls, 'rcnn_reg': rcnn_reg, 'rcnn_cls_3rd': rcnn_cls_3rd, 'rcnn_reg_3rd': rcnn_reg_3rd, 'pred_boxes3d_1st': pred_boxes3d_1st, 'pred_boxes3d_2nd': pred_boxes3d_2nd, 'pred_boxes3d_3rd': pred_boxes3d_3rd } ret_dict.update(sec) ret_dict.update(one) ret_dict.update(two) ret_dict.update(three) if self.training: ret_dict.update(target_dict) return ret_dict
def _eval_data(self, masked_pts=None): """eval data with sampled pts """ with torch.no_grad(): MEAN_SIZE = torch.from_numpy(cfg.CLS_MEAN_SIZE[0]).cuda() batch_size = self.config['batch_size'] # get valid point (projected points should be in image) sample_id, pts_rect, pts_intensity, gt_boxes3d, npoints, labels = \ self.data['sample_id'], self.data['pts_rect'], self.data['pts_intensity'], self.data['gt_boxes3d'], self.data['npoints'], self.data['label'] cls_types = [[ labels[k][i].cls_type for i in range(len(labels[k])) ] for k in range(batch_size)] calib = [ self.test_loader.dataset.get_calib(idx) for idx in sample_id ] if self.use_masked: # use masked/sampled pts if True pts_rect = np.array([ c.lidar_to_rect(masked_pts[k][:, 0:3]) for k, c in enumerate(calib) ]) pts_intensity = [ masked_pts[k][:, 3] for k in range(batch_size) ] npoints = masked_pts.shape[0] inputs = torch.from_numpy(pts_rect).cuda( non_blocking=True).float().view(self.config['batch_size'], -1, 3) gt_boxes3d = torch.from_numpy(gt_boxes3d).cuda(non_blocking=True) input_data = {'pts_input': inputs} # model inference ret_dict = self.model(input_data) roi_scores_raw = ret_dict['roi_scores_raw'] # (B, M) roi_boxes3d = ret_dict['rois'] # (B, M, 7) # seg_result = ret_dict['seg_result'].long() # (B, N) rcnn_cls = ret_dict['rcnn_cls'].view(batch_size, -1, ret_dict['rcnn_cls'].shape[1]) rcnn_reg = ret_dict['rcnn_reg'].view( batch_size, -1, ret_dict['rcnn_reg'].shape[1]) # (B, M, C) norm_scores = torch.sigmoid(rcnn_cls) # remove low confidence scores thresh_mask = norm_scores > cfg.RCNN.SCORE_THRESH # bounding box regression anchor_size = MEAN_SIZE pred_boxes3d = decode_bbox_target( roi_boxes3d.view(-1, 7), rcnn_reg.view(-1, rcnn_reg.shape[-1]), anchor_size=anchor_size, loc_scope=cfg.RCNN.LOC_SCOPE, loc_bin_size=cfg.RCNN.LOC_BIN_SIZE, num_head_bin=cfg.RCNN.NUM_HEAD_BIN, get_xz_fine=True, get_y_by_bin=cfg.RCNN.LOC_Y_BY_BIN, loc_y_scope=cfg.RCNN.LOC_Y_SCOPE, loc_y_bin_size=cfg.RCNN.LOC_Y_BIN_SIZE, get_ry_fine=True).view(batch_size, -1, 7) # select boxes (list of tensors) pred_boxes3d_selected = [ pred_boxes3d[k][thresh_mask[k].view(-1)] for k in range(batch_size) ] raw_scores_selected = [ roi_scores_raw[k][thresh_mask[k].view(-1)] for k in range(batch_size) ] norm_scores_selected = [ norm_scores[k][thresh_mask[k].view(-1)] for k in range(batch_size) ] # rotated NMS boxes_bev_selected = [ kitti_utils.boxes3d_to_bev_torch(bboxes) for bboxes in pred_boxes3d_selected ] keep_idx = [ iou3d_utils.nms_gpu(boxes_bev_selected[k], raw_scores_selected[k], cfg.RCNN.NMS_THRESH).view(-1) for k in range(batch_size) ] pred_boxes3d_selected = [ pred_boxes3d_selected[k][keep_idx[k]] for k in range(batch_size) ] scores_selected = [ raw_scores_selected[k][keep_idx[k]] for k in range(batch_size) ] norm_scores_selected = [ norm_scores_selected[k][keep_idx[k]] for k in range(batch_size) ] # want car gt_boxes keep_idx = [[ i for i in range(len(cls_types[k])) if cls_types[k][i] == 'Car' ] for k in range(batch_size)] gt_boxes3d_selected = [ gt_boxes3d[k][keep_idx[k]] for k in range(batch_size) ] # what if no boxes with cars? has_info = [k for k in range(batch_size) if len(keep_idx[k]) > 0] gt_boxes3d_selected = [gt_boxes3d_selected[x] for x in has_info] pred_boxes3d_selected = [ pred_boxes3d_selected[x] for x in has_info ] batch_size = len(has_info) if batch_size == 0: return None # Intersect over union iou3d = [ iou3d_utils.boxes_iou3d_gpu(gt_boxes3d_selected[k], pred_boxes3d_selected[k]) for k in range(batch_size) ] # get the max iou for each ground truth bounding box gt_max_iou = [ torch.max(iou3d[k], dim=0)[0] for k in range(batch_size) ] # get precision at each index (to get auc) precision_vals = [] for k in range(batch_size): batch_iou = gt_max_iou[k] batch_precision = [] num_correct = 0 for i in range(len(batch_iou)): if batch_iou[i] > 0.7: num_correct += 1 batch_precision.append(num_correct / (i + 1)) precision_vals.append(batch_precision) aps = [] for k in range(batch_size): batch_prec = precision_vals[k] ap = 0 for i in range(len(batch_prec)): ap += max(batch_prec[i:]) aps.append(ap) num_gt_boxes = sum([len(gt_max_iou[k]) for k in range(batch_size)]) return sum(aps) / num_gt_boxes
def forward(self, input_data): """ :param input_data: input dict :return: """ if cfg.RCNN.ROI_SAMPLE_JIT: if self.training: with torch.no_grad(): target_dict = self.proposal_target_layer(input_data) pts_input = torch.cat((target_dict['sampled_pts'], target_dict['pts_feature']), dim=2) target_dict['pts_input'] = pts_input else: rpn_xyz, rpn_features = input_data['rpn_xyz'], input_data['rpn_features'] batch_rois = input_data['roi_boxes3d'] if cfg.RCNN.USE_INTENSITY: pts_extra_input_list = [input_data['rpn_intensity'].unsqueeze(dim=2), input_data['seg_mask'].unsqueeze(dim=2)] else: pts_extra_input_list = [input_data['seg_mask'].unsqueeze(dim=2)] if cfg.RCNN.USE_DEPTH: pts_depth = input_data['pts_depth'] / 70.0 - 0.5 pts_extra_input_list.append(pts_depth.unsqueeze(dim=2)) pts_extra_input = torch.cat(pts_extra_input_list, dim=2) pts_feature = torch.cat((pts_extra_input, rpn_features), dim=2) pooled_features, pooled_empty_flag = \ roipool3d_utils.roipool3d_gpu(rpn_xyz, pts_feature, batch_rois, cfg.RCNN.POOL_EXTRA_WIDTH, sampled_pt_num=cfg.RCNN.NUM_POINTS) # canonical transformation batch_size = batch_rois.shape[0] roi_center = batch_rois[:, :, 0:3] pooled_features[:, :, :, 0:3] -= roi_center.unsqueeze(dim=2) for k in range(batch_size): pooled_features[k, :, :, 0:3] = kitti_utils.rotate_pc_along_y_torch(pooled_features[k, :, :, 0:3], batch_rois[k, :, 6]) pts_input = pooled_features.view(-1, pooled_features.shape[2], pooled_features.shape[3]) else: pts_input = input_data['pts_input'] target_dict = {} target_dict['pts_input'] = input_data['pts_input'] target_dict['roi_boxes3d'] = input_data['roi_boxes3d'] if self.training: target_dict['cls_label'] = input_data['cls_label'] target_dict['reg_valid_mask'] = input_data['reg_valid_mask'] target_dict['gt_of_rois'] = input_data['gt_boxes3d_ct'] xyz, features = self._break_up_pc(pts_input) batch_size = input_data['roi_boxes3d'].size(0) if cfg.RCNN.USE_RPN_FEATURES: xyz_input = pts_input[..., 0:self.rcnn_input_channel].transpose(1, 2).unsqueeze(dim=3) xyz_feature = self.xyz_up_layer(xyz_input) rpn_feature = pts_input[..., self.rcnn_input_channel:].transpose(1, 2).unsqueeze(dim=3) merged_feature = torch.cat((xyz_feature, rpn_feature), dim=1) merged_feature = self.merge_down_layer(merged_feature) l_xyz, l_features = [xyz], [merged_feature.squeeze(dim=3)] else: l_xyz, l_features = [xyz], [features] for i in range(len(self.SA_modules)): li_xyz, li_features = self.SA_modules[i](l_xyz[i], l_features[i]) l_xyz.append(li_xyz) l_features.append(li_features) rcnn_cls = self.cls_layer(l_features[-1]).transpose(1, 2).contiguous().squeeze(dim=1) # (B, 1 or 2) rcnn_reg = self.reg_layer(l_features[-1]).transpose(1, 2).contiguous().squeeze(dim=1) # (B, C) anchor_size = torch.from_numpy(cfg.CLS_MEAN_SIZE[0]).cuda() if self.training: roi_boxes3d=target_dict['roi_boxes3d'].view(-1,7) #roi_boxes3d = input_data['roi_boxes3d'] else: roi_boxes3d=input_data['roi_boxes3d'] pred_boxes3d_1st = decode_bbox_target(roi_boxes3d.view(-1, 7), rcnn_reg.view(-1, rcnn_reg.shape[-1]), anchor_size=anchor_size, loc_scope=cfg.RCNN.LOC_SCOPE, loc_bin_size=cfg.RCNN.LOC_BIN_SIZE, num_head_bin=cfg.RCNN.NUM_HEAD_BIN, get_xz_fine=True, get_y_by_bin=cfg.RCNN.LOC_Y_BY_BIN, loc_y_scope=cfg.RCNN.LOC_Y_SCOPE, loc_y_bin_size=cfg.RCNN.LOC_Y_BIN_SIZE, get_ry_fine=True).view(batch_size, -1, 7) ret_dict = {'rcnn_cls': rcnn_cls, 'rcnn_reg': rcnn_reg,'pred_boxes3d_1st':pred_boxes3d_1st} ret_dict['pooled_feature'] = l_features[-1] if cfg.TRAIN.IOU_LAYER == 'split' and self.training: gt = target_dict['real_gt'] iou_label = [] batch_size_2 = pts_input.shape[0] for i in range(batch_size_2): iou_label.append( iou3d_utils.boxes_iou3d_gpu(pred_boxes3d_1st.view(-1, 7)[i].view(1, 7), gt[i].view(1, 7))) iou_label = torch.cat(iou_label) iou_label = (iou_label - 0.5) * 2 ret_dict['iou_label']=iou_label if self.training: ret_dict.update(target_dict) return ret_dict
def eval_one_epoch_joint(model, dataloader, epoch_id, result_dir, logger): np.random.seed(666) # Loads the mean size of the CLASS from CFG YAML file MEAN_SIZE = torch.from_numpy(cfg.CLS_MEAN_SIZE[0]).cuda() # Assign the MODE as TEST unless EVAL specified mode = 'TEST' if args.test else 'EVAL' # Make output directory result_dir/final_result/data final_output_dir = os.path.join(result_dir, 'final_result', 'data') os.makedirs(final_output_dir, exist_ok=True) # Save data if args.save_result is True or not(default now True) if args.save_result: roi_output_dir = os.path.join(result_dir, 'roi_result', 'data') refine_output_dir = os.path.join(result_dir, 'refine_result', 'data') rpn_output_dir = os.path.join(result_dir, 'rpn_result', 'data') os.makedirs(rpn_output_dir, exist_ok=True) os.makedirs(roi_output_dir, exist_ok=True) os.makedirs(refine_output_dir, exist_ok=True) logger.info('---- EPOCH %s JOINT EVALUATION ----' % epoch_id) logger.info('==> Output file: %s' % result_dir) model.eval() # Threshold for IOU thresh_list = [0.1, 0.3, 0.5, 0.7, 0.9] total_recalled_bbox_list, total_gt_bbox = [0] * 5, 0 total_roi_recalled_bbox_list = [0] * 5 dataset = dataloader.dataset lidar_idx_table = dataset.lidar_idx_table cnt = final_total = total_cls_acc = total_cls_acc_refined = total_rpn_iou = 0 progress_bar = tqdm.tqdm(total=len(dataloader), leave=True, desc='eval') # Iterate through data in dataloader for data in dataloader: cnt += 1 sample_id, pts_rect, pts_features, pts_input = data['sample_id'], data[ 'pts_rect'], data['pts_features'], data['pts_input'] batch_size = len(sample_id) inputs = torch.from_numpy(pts_input).cuda(non_blocking=True).float() input_data = {'pts_input': inputs} # model inference ret_dict = model(input_data) roi_scores_raw = ret_dict['roi_scores_raw'] # (B, M) roi_boxes3d = ret_dict['rois'] # (B, M, 7) seg_result = ret_dict['seg_result'].long() # (B, N) rcnn_cls = ret_dict['rcnn_cls'].view(batch_size, -1, ret_dict['rcnn_cls'].shape[1]) rcnn_reg = ret_dict['rcnn_reg'].view( batch_size, -1, ret_dict['rcnn_reg'].shape[1]) # (B, M, C) # bounding box regression anchor_size = MEAN_SIZE if cfg.RCNN.SIZE_RES_ON_ROI: assert False pred_boxes3d = decode_bbox_target( roi_boxes3d.view(-1, 7), rcnn_reg.view(-1, rcnn_reg.shape[-1]), anchor_size=anchor_size, loc_scope=cfg.RCNN.LOC_SCOPE, loc_bin_size=cfg.RCNN.LOC_BIN_SIZE, num_head_bin=cfg.RCNN.NUM_HEAD_BIN, get_xz_fine=True, get_y_by_bin=cfg.RCNN.LOC_Y_BY_BIN, loc_y_scope=cfg.RCNN.LOC_Y_SCOPE, loc_y_bin_size=cfg.RCNN.LOC_Y_BIN_SIZE, get_ry_fine=True).view(batch_size, -1, 7) # scoring if rcnn_cls.shape[2] == 1: raw_scores = rcnn_cls # (B, M, 1) norm_scores = torch.sigmoid(raw_scores) pred_classes = (norm_scores > cfg.RCNN.SCORE_THRESH).long() else: pred_classes = torch.argmax(rcnn_cls, dim=1).view(-1) cls_norm_scores = F.softmax(rcnn_cls, dim=1) raw_scores = rcnn_cls[:, pred_classes] norm_scores = cls_norm_scores[:, pred_classes] # evaluation recalled_num = gt_num = rpn_iou = 0 if not args.test: if not cfg.RPN.FIXED: rpn_cls_label, rpn_reg_label = data['rpn_cls_label'], data[ 'rpn_reg_label'] rpn_cls_label = torch.from_numpy(rpn_cls_label).cuda( non_blocking=True).long() gt_boxes3d = data['gt_boxes3d'] gt_boxes3d = filtrate_gtboxes(gt_boxes3d) for k in range(batch_size): # calculate recall cur_gt_boxes3d = gt_boxes3d[k] tmp_idx = cur_gt_boxes3d.__len__() - 1 while tmp_idx >= 0 and cur_gt_boxes3d[tmp_idx].sum() == 0: tmp_idx -= 1 if tmp_idx >= 0: cur_gt_boxes3d = cur_gt_boxes3d[:tmp_idx + 1] cur_gt_boxes3d = torch.from_numpy(cur_gt_boxes3d).cuda( non_blocking=True).float() iou3d = iou3d_utils.boxes_iou3d_gpu( pred_boxes3d[k], cur_gt_boxes3d) gt_max_iou, _ = iou3d.max(dim=0) refined_iou, _ = iou3d.max(dim=1) for idx, thresh in enumerate(thresh_list): total_recalled_bbox_list[idx] += (gt_max_iou > thresh).sum().item() recalled_num += (gt_max_iou > 0.7).sum().item() gt_num += cur_gt_boxes3d.shape[0] total_gt_bbox += cur_gt_boxes3d.shape[0] # original recall iou3d_in = iou3d_utils.boxes_iou3d_gpu( roi_boxes3d[k], cur_gt_boxes3d) gt_max_iou_in, _ = iou3d_in.max(dim=0) for idx, thresh in enumerate(thresh_list): total_roi_recalled_bbox_list[idx] += ( gt_max_iou_in > thresh).sum().item() if not cfg.RPN.FIXED: fg_mask = rpn_cls_label > 0 correct = ((seg_result == rpn_cls_label) & fg_mask).sum().float() union = fg_mask.sum().float() + (seg_result > 0).sum().float() - correct rpn_iou = correct / torch.clamp(union, min=1.0) total_rpn_iou += rpn_iou.item() disp_dict = { 'mode': mode, 'recall': '%d/%d' % (total_recalled_bbox_list[3], total_gt_bbox) } progress_bar.set_postfix(disp_dict) progress_bar.update() if args.save_result: # save roi and refine results roi_boxes3d_np = roi_boxes3d.cpu().numpy() pred_boxes3d_np = pred_boxes3d.cpu().numpy() roi_scores_raw_np = roi_scores_raw.cpu().numpy() raw_scores_np = raw_scores.cpu().numpy() rpn_cls_np = ret_dict['rpn_cls'].cpu().numpy() rpn_xyz_np = ret_dict['backbone_xyz'].cpu().numpy() print(ret_dict['backbone_xyz'].cpu().numpy()[0].shape) rpn_xyz_np = np.dot( np.linalg.inv(argo_to_kitti), ret_dict['backbone_xyz'].cpu().numpy()[0].T).T.reshape( 1, -1, 3) seg_result_np = seg_result.cpu().numpy() output_data = np.concatenate( (rpn_xyz_np, rpn_cls_np.reshape(batch_size, -1, 1), seg_result_np.reshape(batch_size, -1, 1)), axis=2) for k in range(batch_size): cur_sample_id = sample_id[k] #calib = dataset.get_calib(cur_sample_id) #image_shape = dataset.get_image_shape(cur_sample_id) save_argo_format(cur_sample_id, roi_boxes3d_np[k], roi_output_dir, roi_scores_raw_np[k], lidar_idx_table) save_argo_format(cur_sample_id, pred_boxes3d_np[k], refine_output_dir, raw_scores_np[k], lidar_idx_table) output_file = os.path.join( rpn_output_dir, lidar_idx_table['%06d' % cur_sample_id] + '.npy') np.save(output_file, output_data.astype(np.float32)) # scores thresh inds = norm_scores > cfg.RCNN.SCORE_THRESH for k in range(batch_size): cur_inds = inds[k].view(-1) if cur_inds.sum() == 0: continue pred_boxes3d_selected = pred_boxes3d[k, cur_inds] raw_scores_selected = raw_scores[k, cur_inds] norm_scores_selected = norm_scores[k, cur_inds] # NMS thresh # rotated nms boxes_bev_selected = kitti_utils.boxes3d_to_bev_torch( pred_boxes3d_selected) keep_idx = iou3d_utils.nms_gpu(boxes_bev_selected, raw_scores_selected, cfg.RCNN.NMS_THRESH).view(-1) pred_boxes3d_selected = pred_boxes3d_selected[keep_idx] scores_selected = raw_scores_selected[keep_idx] pred_boxes3d_selected, scores_selected = pred_boxes3d_selected.cpu( ).numpy(), scores_selected.cpu().numpy() cur_sample_id = sample_id[k] #calib = dataset.get_calib(cur_sample_id) final_total += pred_boxes3d_selected.shape[0] #image_shape = dataset.get_image_shape(cur_sample_id) save_argo_format(cur_sample_id, pred_boxes3d_selected, final_output_dir, scores_selected, lidar_idx_table) progress_bar.close() ret_dict = {} logger.info( '-------------------performance of epoch %s---------------------' % epoch_id) logger.info(str(datetime.now())) avg_rpn_iou = (total_rpn_iou / max(cnt, 1.0)) avg_cls_acc = (total_cls_acc / max(cnt, 1.0)) avg_cls_acc_refined = (total_cls_acc_refined / max(cnt, 1.0)) avg_det_num = (final_total / max(len(dataset), 1.0)) logger.info('final average detections: %.3f' % avg_det_num) logger.info('final average rpn_iou refined: %.3f' % avg_rpn_iou) logger.info('final average cls acc: %.3f' % avg_cls_acc) logger.info('final average cls acc refined: %.3f' % avg_cls_acc_refined) ret_dict['rpn_iou'] = avg_rpn_iou ret_dict['rcnn_cls_acc'] = avg_cls_acc ret_dict['rcnn_cls_acc_refined'] = avg_cls_acc_refined ret_dict['rcnn_avg_num'] = avg_det_num for idx, thresh in enumerate(thresh_list): cur_roi_recall = total_roi_recalled_bbox_list[idx] / max( total_gt_bbox, 1.0) logger.info('total roi bbox recall(thresh=%.3f): %d / %d = %f' % (thresh, total_roi_recalled_bbox_list[idx], total_gt_bbox, cur_roi_recall)) ret_dict['rpn_recall(thresh=%.2f)' % thresh] = cur_roi_recall for idx, thresh in enumerate(thresh_list): cur_recall = total_recalled_bbox_list[idx] / max(total_gt_bbox, 1.0) logger.info( 'total bbox recall(thresh=%.3f): %d / %d = %f' % (thresh, total_recalled_bbox_list[idx], total_gt_bbox, cur_recall)) ret_dict['rcnn_recall(thresh=%.2f)' % thresh] = cur_recall logger.info('result is saved to: %s' % result_dir) return ret_dict