def im_detect_2d(model, data, eval_config, im_orig=None): im_info = data['im_info'] with torch.no_grad(): prediction = model(data) if eval_config.get('feat_vis'): featmaps_dict = model.get_feat() from utils.visualizer import FeatVisualizer feat_visualizer = FeatVisualizer() feat_visualizer.visualize_maps(featmaps_dict) cls_prob = prediction['rcnn_cls_probs'] rois = prediction['rois_batch'] bbox_pred = prediction['rcnn_bbox_preds'] anchors = prediction['second_rpn_anchors'][0] scores = cls_prob im_scale = im_info[0][2] boxes = rois.data[:, :, 1:5] if prediction.get('rois_scores') is not None: rois_scores = prediction['rois_scores'] boxes = torch.cat([boxes, rois_scores], dim=2) # visualize rois if im_orig is not None and eval_config['rois_vis']: visualize_bbox(im_orig.numpy()[0], boxes.cpu().numpy()[0], save=True) # visualize_bbox(im_orig.numpy()[0], anchors[0].cpu().numpy()[:100], save=True) if eval_config['bbox_reg']: # Apply bounding-box regression deltas box_deltas = bbox_pred.data # if eval_config['bbox_normalize_targets_precomputed']: # # Optionally normalize targets by a precomputed mean and stdev if not eval_config['class_agnostic']: boxes = boxes.repeat(1, 1, len(eval_config['classes']) + 1) # box_deltas = box_deltas.view( # -1, 4) * torch.FloatTensor(eval_config[ # 'bbox_normalize_stds']).cuda() + torch.FloatTensor( # eval_config['bbox_normalize_means']).cuda() # box_deltas = box_deltas.view(eval_config['batch_size'], -1, 4) # else: # box_deltas = box_deltas.view( # -1, 4) * torch.FloatTensor(eval_config[ # 'bbox_normalize_stds']).cuda() + torch.FloatTensor( # eval_config['bbox_normalize_means']).cuda() # box_deltas = box_deltas.view(eval_config['batch_size'], -1, # 4 * len(eval_config['classes'])) # pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = model.target_assigner.bbox_coder.decode_batch( box_deltas.view(eval_config['batch_size'], -1, 4), boxes.view(eval_config['batch_size'], -1, 4)) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) pred_boxes /= im_scale return pred_boxes, scores, rois[:, :, 1:5], anchors
def init_param(self, model_config): classes = model_config['classes'] self.classes = classes self.n_classes = len(classes) self.class_agnostic = model_config['class_agnostic'] self.pooling_size = model_config['pooling_size'] self.pooling_mode = model_config['pooling_mode'] self.crop_resize_with_max_pool = model_config[ 'crop_resize_with_max_pool'] self.truncated = model_config['truncated'] self.use_focal_loss = model_config['use_focal_loss'] self.subsample_twice = model_config['subsample_twice'] self.rcnn_batch_size = model_config['rcnn_batch_size'] # some submodule config self.feature_extractor_config = model_config[ 'feature_extractor_config'] self.rpn_config = model_config['rpn_config'] # assigner self.target_assigner = TargetAssigner( model_config['target_assigner_config']) # sampler self.sampler = BalancedSampler(model_config['sampler_config']) # self.reduce = model_config.get('reduce') self.reduce = True self.visualizer = FeatVisualizer()
def init_param(self, model_config): classes = model_config['classes'] self.classes = classes self.n_classes = len(classes) self.class_agnostic = model_config['class_agnostic'] self.pooling_size = model_config['pooling_size'] self.pooling_mode = model_config['pooling_mode'] self.crop_resize_with_max_pool = model_config[ 'crop_resize_with_max_pool'] self.truncated = model_config['truncated'] self.use_focal_loss = model_config['use_focal_loss'] self.subsample_twice = model_config['subsample_twice'] self.rcnn_batch_size = model_config['rcnn_batch_size'] # some submodule config self.feature_extractor_config = model_config[ 'feature_extractor_config'] self.rpn_config = model_config['rpn_config'] # sampler self.sampler = BalancedSampler(model_config['sampler_config']) # self.reduce = model_config.get('reduce') self.reduce = True self.visualizer = FeatVisualizer() self.num_bins = 4 self.train_3d = False self.train_2d = not self.train_3d # more accurate bbox for 3d prediction if self.train_3d: fg_thresh = 0.6 else: fg_thresh = 0.5 model_config['target_assigner_config']['fg_thresh'] = fg_thresh # assigner self.target_assigner = TargetAssigner( model_config['target_assigner_config']) self.profiler = Profiler() self.h_cat = False
def test_corners_3d(self, dataloader, model, logger): self.logger.info('Start testing') num_samples = len(dataloader) if self.feat_vis: # enable it before forward pass model.enable_feat_vis() end_time = 0 for step, data in enumerate(dataloader): # start_time = time.time() data = common.to_cuda(data) image_path = data[constants.KEY_IMAGE_PATH] with torch.no_grad(): prediction, _, _ = model(data) # duration_time = time.time() - start_time if self.feat_vis: featmaps_dict = model.get_feat() from utils.visualizer import FeatVisualizer feat_visualizer = FeatVisualizer() feat_visualizer.visualize_maps(featmaps_dict) # initialize dets for each classes # dets = [[] for class_ind in range(self.n_classes)] scores = prediction[constants.KEY_CLASSES] boxes_2d = prediction[constants.KEY_BOXES_2D] # dims = prediction[constants.KEY_DIMS] corners_2d = prediction[constants.KEY_CORNERS_2D] # import ipdb # ipdb.set_trace() p2 = data[constants.KEY_STEREO_CALIB_P2_ORIG] # rcnn_3d = prediction['rcnn_3d'] batch_size = scores.shape[0] scores = scores.view(-1, self.n_classes) new_scores = torch.zeros_like(scores) _, scores_argmax = scores.max(dim=-1) row = torch.arange(0, scores_argmax.numel()).type_as(scores_argmax) new_scores[row, scores_argmax] = scores[row, scores_argmax] scores = new_scores.view(batch_size, -1, self.n_classes) # if step == 6: # import ipdb # ipdb.set_trace() for batch_ind in range(batch_size): boxes_2d_per_img = boxes_2d[batch_ind] scores_per_img = scores[batch_ind] # dims_per_img = dims[batch_ind] corners_2d_per_img = corners_2d[batch_ind] p2_per_img = p2[batch_ind] num_cols = corners_2d.shape[-1] dets = [np.zeros((0, 8, num_cols), dtype=np.float32)] dets_2d = [np.zeros((0, 4), dtype=np.float32)] for class_ind in range(1, self.n_classes): # cls thresh inds = torch.nonzero( scores_per_img[:, class_ind] > self.thresh).view(-1) threshed_scores_per_img = scores_per_img[inds, class_ind] if inds.numel() > 0: # if self.class_agnostic: threshed_boxes_2d_per_img = boxes_2d_per_img[inds] # threshed_dims_per_img = dims_per_img[inds] threshed_corners_2d_per_img = corners_2d_per_img[inds] # threshed_rcnn_3d_per_img = rcnn_3d_per_img[inds] # else: # threshed_boxes_2d_per_img = boxes_2d_per_img[ # inds, class_ind * 4:class_ind * 4 + 4] # concat boxes and scores threshed_dets_per_img = torch.cat( [ threshed_boxes_2d_per_img, threshed_scores_per_img.unsqueeze(-1), # threshed_dims_per_img, ], dim=-1) # sort by scores _, order = torch.sort(threshed_scores_per_img, 0, True) threshed_dets_per_img = threshed_dets_per_img[order] threshed_corners_2d_per_img = threshed_corners_2d_per_img[ order] # nms keep = nms(threshed_dets_per_img[:, :4], threshed_dets_per_img[:, 4], self.nms).view(-1).long() nms_dets_per_img = threshed_dets_per_img[keep].detach( ).cpu().numpy() nms_corners_2d_per_img = threshed_corners_2d_per_img[ keep].detach().cpu().numpy() dets.append(nms_corners_2d_per_img) dets_2d.append(nms_dets_per_img[:, :4]) else: dets.append( np.zeros((0, 8, num_cols), dtype=np.float32)) dets_2d.append(np.zeros((0, 4))) # import ipdb # ipdb.set_trace() corners = np.concatenate(dets, axis=0) dets_2d = np.concatenate(dets_2d, axis=0) corners_2d = None corners_3d = None if num_cols == 3: corners_3d = corners else: corners_2d = corners self.visualizer.render_image_corners_2d( image_path[0], boxes_2d=dets_2d, corners_2d=corners_2d, corners_3d=corners_3d, p2=p2_per_img.cpu().numpy()) duration_time = time.time() - end_time # label_path = self._generate_label_path(image_path[batch_ind]) # self.save_mono_3d_dets(dets, label_path) sys.stdout.write('\r{}/{},duration: {}'.format( step + 1, num_samples, duration_time)) sys.stdout.flush() end_time = time.time()
def test_super_nms(self, dataloader, model, logger): self.logger.info('Start testing') num_samples = len(dataloader) if self.feat_vis: # enable it before forward pass model.enable_feat_vis() end_time = 0 for step, data in enumerate(dataloader): # start_time = time.time() data = common.to_cuda(data) image_path = data[constants.KEY_IMAGE_PATH] with torch.no_grad(): prediction = model(data) # duration_time = time.time() - start_time if self.feat_vis: featmaps_dict = model.get_feat() from utils.visualizer import FeatVisualizer feat_visualizer = FeatVisualizer() feat_visualizer.visualize_maps(featmaps_dict) # initialize dets for each classes # dets = [[] for class_ind in range(self.n_classes)] dets = [[]] scores = prediction[constants.KEY_CLASSES] boxes_2d = prediction[constants.KEY_BOXES_2D] batch_size = scores.shape[0] # scores = scores.view(-1, self.n_classes) # new_scores = torch.zeros_like(scores) # _, scores_argmax = scores.max(dim=-1) # row = torch.arange(0, scores_argmax.numel()).type_as(scores_argmax) # new_scores[row, scores_argmax] = scores[row, scores_argmax] # scores = new_scores.view(batch_size, -1, self.n_classes) # if step == 6: # import ipdb # ipdb.set_trace() for batch_ind in range(batch_size): boxes_2d_per_img = boxes_2d[batch_ind] scores_per_img = scores[batch_ind] for class_ind in range(1, self.n_classes): # cls thresh # import ipdb # ipdb.set_trace() inds = torch.nonzero( scores_per_img[:, class_ind] > 0.01).view(-1) threshed_scores_per_img = scores_per_img[inds, class_ind] if inds.numel() > 0: # if self.class_agnostic: threshed_boxes_2d_per_img = boxes_2d_per_img[inds] # else: # threshed_boxes_2d_per_img = boxes_2d_per_img[ # inds, class_ind * 4:class_ind * 4 + 4] # concat boxes and scores threshed_dets_per_img = torch.cat([ threshed_boxes_2d_per_img, threshed_scores_per_img.unsqueeze(-1), ], dim=-1) # sort by scores _, order = torch.sort(threshed_scores_per_img, 0, True) threshed_dets_per_img = threshed_dets_per_img[order] # nms # keep = nms(threshed_dets_per_img[:, :4], # threshed_dets_per_img[:, 4], # self.nms).view(-1).long() keep = box_ops.super_nms(threshed_dets_per_img[:, :4], 0.8, nms_num=3, loop_time=2) nms_dets_per_img = threshed_dets_per_img[keep].detach( ).cpu().numpy() dets.append(nms_dets_per_img) else: dets.append([]) duration_time = time.time() - end_time label_path = self._generate_label_path(image_path[batch_ind]) self.save_dets(dets, label_path) sys.stdout.write('\r{}/{},duration: {}'.format( step + 1, num_samples, duration_time)) sys.stdout.flush() end_time = time.time()
def test_3d(self, dataloader, model, logger): self.logger.info('Start testing') num_samples = len(dataloader) if self.feat_vis: # enable it before forward pass model.enable_feat_vis() end_time = 0 for step, data in enumerate(dataloader): # start_time = time.time() data = common.to_cuda(data) image_path = data[constants.KEY_IMAGE_PATH] with torch.no_grad(): prediction, _, _ = model(data) # duration_time = time.time() - start_time if self.feat_vis: featmaps_dict = model.get_feat() from utils.visualizer import FeatVisualizer feat_visualizer = FeatVisualizer() feat_visualizer.visualize_maps(featmaps_dict) # initialize dets for each classes # dets = [[] for class_ind in range(self.n_classes)] dets = [[]] scores = prediction[constants.KEY_CLASSES] boxes_2d = prediction[constants.KEY_BOXES_2D] dims = prediction[constants.KEY_DIMS] orients = prediction[constants.KEY_ORIENTS_V2] p2 = data[constants.KEY_STEREO_CALIB_P2_ORIG] # rcnn_3d = prediction['rcnn_3d'] batch_size = scores.shape[0] scores = scores.view(-1, self.n_classes) new_scores = torch.zeros_like(scores) _, scores_argmax = scores.max(dim=-1) row = torch.arange(0, scores_argmax.numel()).type_as(scores_argmax) new_scores[row, scores_argmax] = scores[row, scores_argmax] scores = new_scores.view(batch_size, -1, self.n_classes) # if step == 6: # import ipdb # ipdb.set_trace() for batch_ind in range(batch_size): boxes_2d_per_img = boxes_2d[batch_ind] scores_per_img = scores[batch_ind] dims_per_img = dims[batch_ind] orients_per_img = orients[batch_ind] p2_per_img = p2[batch_ind] # rcnn_3d_per_img = rcnn_3d[batch_ind] for class_ind in range(1, self.n_classes): # cls thresh inds = torch.nonzero( scores_per_img[:, class_ind] > self.thresh).view(-1) threshed_scores_per_img = scores_per_img[inds, class_ind] if inds.numel() > 0: # if self.class_agnostic: threshed_boxes_2d_per_img = boxes_2d_per_img[inds] threshed_dims_per_img = dims_per_img[inds] threshed_orients_per_img = orients_per_img[inds] # threshed_rcnn_3d_per_img = rcnn_3d_per_img[inds] # else: # threshed_boxes_2d_per_img = boxes_2d_per_img[ # inds, class_ind * 4:class_ind * 4 + 4] # concat boxes and scores threshed_dets_per_img = torch.cat([ threshed_boxes_2d_per_img, threshed_scores_per_img.unsqueeze(-1), threshed_dims_per_img, threshed_orients_per_img.unsqueeze(-1) ], dim=-1) # sort by scores _, order = torch.sort(threshed_scores_per_img, 0, True) threshed_dets_per_img = threshed_dets_per_img[order] # threshed_rcnn_3d_per_img = threshed_rcnn_3d_per_img[order] # nms keep = nms(threshed_dets_per_img[:, :4], threshed_dets_per_img[:, 4], self.nms).view(-1).long() nms_dets_per_img = threshed_dets_per_img[keep].detach( ).cpu().numpy() # nms_rcnn_3d_per_img = threshed_rcnn_3d_per_img[keep].detach().cpu().numpy() # calculate location location = geometry_utils.calc_location( nms_dets_per_img[:, 5:8], nms_dets_per_img[:, :5], nms_dets_per_img[:, 8], p2_per_img.cpu().numpy()) # import ipdb # ipdb.set_trace() # location, _ = mono_3d_postprocess_bbox( # nms_rcnn_3d_per_img, nms_dets_per_img[:, :5], # p2_per_img.cpu().numpy()) nms_dets_per_img = np.concatenate([ nms_dets_per_img[:, :5], nms_dets_per_img[:, 5:8], location, nms_dets_per_img[:, -1:] ], axis=-1) # nms_dets_per_img = np.concatenate( # [nms_dets_per_img[:, :5], location], axis=-1) dets.append(nms_dets_per_img) else: dets.append([]) duration_time = time.time() - end_time label_path = self._generate_label_path(image_path[batch_ind]) self.save_mono_3d_dets(dets, label_path) sys.stdout.write('\r{}/{},duration: {}'.format( step + 1, num_samples, duration_time)) sys.stdout.flush() end_time = time.time()
def oft_test(eval_config, data_loader, model): """ Only one image in batch is supported """ # import ipdb # ipdb.set_trace() num_samples = len(data_loader) end_time = 0 for i, data in enumerate(data_loader): img_file = data['img_name'] start_time = time.time() with torch.no_grad(): data = to_cuda(data) prediction = model(data) if eval_config.get('feat_vis'): featmaps_dict = model.get_feat() from utils.visualizer import FeatVisualizer feat_visualizer = FeatVisualizer() feat_visualizer.visualize_maps(featmaps_dict) pred_probs_3d = prediction['pred_probs_3d'] pred_boxes_3d = prediction['pred_boxes_3d'] duration_time = time.time() - start_time scores = pred_probs_3d.squeeze() pred_boxes_3d = pred_boxes_3d.squeeze() classes = eval_config['classes'] thresh = eval_config['thresh'] thresh = 0.1 # import ipdb # ipdb.set_trace() dets = [] # nms for j in range(1, len(classes)): inds = torch.nonzero(scores[:, j] > thresh).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[:, j][inds] cls_scores, order = torch.sort(cls_scores, 0, True) if eval_config['class_agnostic']: pred_boxes_3d = pred_boxes_3d[inds, :] else: pred_boxes_3d = pred_boxes_3d[inds][:, j * 4:(j + 1) * 4] pred_boxes_3d = pred_boxes_3d[order] # keep = nms(pred_boxes_3d, eval_config['nms']) # pred_boxes_3d = pred_boxes_3d[keep.view(-1).long()] pred_boxes_3d = pred_boxes_3d.detach().cpu().numpy() p2 = data['orig_p2'][0].detach().cpu().numpy() cls_scores = cls_scores.cpu().numpy() cls_boxes = proj_3dTo2d(pred_boxes_3d, p2) # import ipdb # ipdb.set_trace() cls_dets = np.concatenate( (cls_boxes, cls_scores[..., np.newaxis]), 1) # img filter(ignore outside of image) img_filter = get_img_filter(cls_dets) final_dets = np.concatenate([cls_dets, pred_boxes_3d], axis=-1) final_dets = final_dets[img_filter] dets.append(final_dets) else: dets.append([]) save_dets(dets, img_file[0], 'kitti', eval_config['eval_out']) sys.stdout.write('\r{}/{},duration: {}'.format(i + 1, num_samples, duration_time)) sys.stdout.flush()