for j in range(1, imdb.num_classes): inds = torch.nonzero(scores[:, j] > thresh).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[:, j][inds] _, order = torch.sort(cls_scores, 0, True) if args.class_agnostic: cls_boxes = pred_boxes[inds, :] else: cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) # cls_dets = torch.cat((cls_boxes, cls_scores), 1) cls_dets = cls_dets[order] keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS) cls_dets = cls_dets[keep.view(-1).long()] all_boxes[j][i] = cls_dets.cpu().numpy() else: all_boxes[j][i] = empty_array # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack([ all_boxes[j][i][:, -1] for j in range(1, imdb.num_classes) ]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in range(1, imdb.num_classes): keep = np.where( all_boxes[j][i][:, -1] >= image_thresh)[0]
def forward(self, input): # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) # the first set of _num_anchors channels are bg probs # the second set are the fg probs scores = input[0][:, self._num_anchors:, :, :] bbox_deltas = input[1] im_info = input[2] cfg_key = input[3] pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH min_size = cfg[cfg_key].RPN_MIN_SIZE batch_size = bbox_deltas.size(0) feat_height, feat_width = scores.size(2), scores.size(3) shift_x = np.arange(0, feat_width) * self._feat_stride shift_y = np.arange(0, feat_height) * self._feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = torch.from_numpy( np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose()) shifts = shifts.contiguous().type_as(scores).float() A = self._num_anchors K = shifts.size(0) self._anchors = self._anchors.type_as(scores) # anchors = self._anchors.view(1, A, 4) + shifts.view(1, K, 4).permute(1, 0, 2).contiguous() anchors = self._anchors.view(1, A, 4) + shifts.view(K, 1, 4) anchors = anchors.view(1, K * A, 4).expand(batch_size, K * A, 4) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: bbox_deltas = bbox_deltas.permute(0, 2, 3, 1).contiguous() bbox_deltas = bbox_deltas.view(batch_size, -1, 4) # Same story for the scores: scores = scores.permute(0, 2, 3, 1).contiguous() scores = scores.view(batch_size, -1) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas, batch_size) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info, batch_size) # proposals = clip_boxes_batch(proposals, im_info, batch_size) # assign the score to 0 if it's non keep. # keep = self._filter_boxes(proposals, min_size * im_info[:, 2]) # trim keep index to make it euqal over batch # keep_idx = torch.cat(tuple(keep_idx), 0) # scores_keep = scores.view(-1)[keep_idx].view(batch_size, trim_size) # proposals_keep = proposals.view(-1, 4)[keep_idx, :].contiguous().view(batch_size, trim_size, 4) # _, order = torch.sort(scores_keep, 1, True) scores_keep = scores proposals_keep = proposals _, order = torch.sort(scores_keep, 1, True) output = scores.new(batch_size, post_nms_topN, 5).zero_() for i in range(batch_size): # # 3. remove predicted boxes with either height or width < threshold # # (NOTE: convert min_size to input image scale stored in im_info[2]) proposals_single = proposals_keep[i] scores_single = scores_keep[i] # # 4. sort all (proposal, score) pairs by score from highest to lowest # # 5. take top pre_nms_topN (e.g. 6000) order_single = order[i] if pre_nms_topN > 0 and pre_nms_topN < scores_keep.numel(): order_single = order_single[:pre_nms_topN] proposals_single = proposals_single[order_single, :] scores_single = scores_single[order_single].view(-1, 1) # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep_idx_i = nms(proposals_single, scores_single.squeeze(1), nms_thresh) keep_idx_i = keep_idx_i.long().view(-1) if post_nms_topN > 0: keep_idx_i = keep_idx_i[:post_nms_topN] proposals_single = proposals_single[keep_idx_i, :] scores_single = scores_single[keep_idx_i, :] # padding 0 at the end. num_proposal = proposals_single.size(0) output[i, :, 0] = i output[i, :num_proposal, 1:] = proposals_single return output
def generate_proposal(self, rpn_cls_probs, anchors, rpn_bbox_preds, im_info): # TODO create a new Function """ Args: rpn_cls_probs: FloatTensor,shape(N,2*num_anchors,H,W) rpn_bbox_preds: FloatTensor,shape(N,num_anchors*4,H,W) anchors: FloatTensor,shape(N,4,H,W) Returns: proposals_batch: FloatTensor, shape(N,post_nms_topN,4) fg_probs_batch: FloatTensor, shape(N,post_nms_topN) """ # assert len( # rpn_bbox_preds) == 1, 'just one feature maps is supported now' # rpn_bbox_preds = rpn_bbox_preds[0] # do not backward rpn_cls_probs = rpn_cls_probs.detach() rpn_bbox_preds = rpn_bbox_preds.detach() batch_size = rpn_bbox_preds.shape[0] coders = bbox_coders.build( self.target_generators.target_generator_config['coder_config']) proposals = coders.decode_batch(rpn_bbox_preds, anchors) # filer and clip proposals = box_ops.clip_boxes(proposals, im_info) # fg prob fg_probs = rpn_cls_probs[:, :, 1] # sort fg _, fg_probs_order = torch.sort(fg_probs, dim=1, descending=True) # fg_probs_batch = torch.zeros(batch_size, # self.post_nms_topN).type_as(rpn_cls_probs) proposals_batch = torch.zeros(batch_size, self.post_nms_topN, 4).type_as(rpn_bbox_preds) proposals_order = torch.zeros( batch_size, self.post_nms_topN).fill_(-1).type_as(fg_probs_order) for i in range(batch_size): proposals_single = proposals[i] fg_probs_single = fg_probs[i] fg_order_single = fg_probs_order[i] # pre nms if self.pre_nms_topN > 0: fg_order_single = fg_order_single[:self.pre_nms_topN] proposals_single = proposals_single[fg_order_single] fg_probs_single = fg_probs_single[fg_order_single] # nms keep_idx_i = nms(proposals_single, fg_probs_single, self.nms_thresh) keep_idx_i = keep_idx_i.long().view(-1) # post nms if self.post_nms_topN > 0: keep_idx_i = keep_idx_i[:self.post_nms_topN] proposals_single = proposals_single[keep_idx_i, :] fg_probs_single = fg_probs_single[keep_idx_i] fg_order_single = fg_order_single[keep_idx_i] # padding 0 at the end. num_proposal = keep_idx_i.numel() proposals_batch[i, :num_proposal, :] = proposals_single # fg_probs_batch[i, :num_proposal] = fg_probs_single proposals_order[i, :num_proposal] = fg_order_single return proposals_batch, proposals_order
def select_boxes(rois, cls_prob, bbox_pred, im_infos, thresh=0.05, max_per_image=5): """ Select bounding boxes of objects from the predicted results of faster rcnn """ n_classes = cls_prob.shape[2] all_boxes = [] for i in range(rois.shape[0]): boxes = rois[i, :, 1:5].view(1, -1, 4) scores = cls_prob[i, :, :].view(1, -1, n_classes) box_deltas = bbox_pred[i, :, :].view(1, -1, 4 * n_classes) im_info = im_infos[i, :].view(1, 3) # Normalize boxes deltas by a mean and std bbox_normalize_means = (0.0, 0.0, 0.0, 0.0) bbox_normalize_stds = (0.1, 0.1, 0.2, 0.2) box_deltas = box_deltas.view(-1, 4) * torch.Tensor(bbox_normalize_stds).cuda() \ + torch.Tensor(bbox_normalize_means).cuda() # box_deltas = box_deltas.view(-1, 4) * torch.Tensor(bbox_normalize_stds) \ # + torch.Tensor(bbox_normalize_means) # 21 is the number of classed in pascal voc datasets box_deltas = box_deltas.view(1, -1, 4 * n_classes) # Compute predicted boxes by predicted rois and the corresponding box deltas # Clip borders of predicted boxes if they cross the border of the resized image pred_boxes = bbox_transform_inv(boxes, box_deltas, batch_size=1) # padding = 5 # pred_boxes[:, :, 0::4] -= padding # pred_boxes[:, :, 1::4] -= padding # pred_boxes[:, :, 2::4] += padding # pred_boxes[:, :, 3::4] += padding pred_boxes = clip_boxes(pred_boxes, im_info, batch_size=1) # pred_boxes.shape: (300, 4 * n_classes) # scores.shape: (300, n_classes) pred_boxes = pred_boxes.squeeze() scores = scores.squeeze() all_box = [] for j in range(1, 21): inds = torch.nonzero(scores[:, j] > thresh).view(-1) if inds.numel() > 0: cls_scores = scores[:, j][inds] _, order = torch.sort(cls_scores, dim=0, descending=True) cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] # Concatenate boxes coordinates and class scores cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) cls_dets = cls_dets[order] # Non-maximum suppression (suppress boxes with IoU >= 0.3) for selecting predicted boxes for each object class in the image keep = nms(cls_boxes[order, :], cls_scores[order], 0.3) cls_dets = cls_dets[keep.view(-1).long()] # Concatenate the class ids of boxes class_id = torch.ones(cls_dets.shape[0], 1, dtype=torch.float).cuda() * j # class_id = torch.ones(cls_dets.shape[0], 1, dtype=torch.float) * j cls_dets = torch.cat((cls_dets, class_id), dim=1) # Add each box for i in range(cls_dets.shape[0]): all_box.append(cls_dets[i, :].cpu().detach().numpy()) # all_box.append(cls_dets[i, :].detach().numpy()) all_box = np.array(all_box) if all_box.shape[0] != 0: # all_box = np.concatenate((all_box, all_box), axis=0) # Rank all boxes based on scores in the descending order index = all_box[:, 4].argsort()[::-1] all_box = all_box[index, :] # Limit to max_per_image detections *over all classes* if all_box.shape[0] > max_per_image: all_box = all_box[:max_per_image, :] # all_box.shape: (num_boxes, 6), 6: (x_min, y_min, x_max, y_max, score, class_id) # print(all_box) # print(all_box.shape[0]) # print('\n') all_boxes.append(all_box) return all_boxes
def test_2d(self, dataloader, model, logger): self.logger.info('Start testing') num_samples = len(dataloader) if self.feat_vis: # enable it before forward pass model.enable_feat_vis() end_time = 0 for step, data in enumerate(dataloader): # start_time = time.time() data = common.to_cuda(data) image_path = data[constants.KEY_IMAGE_PATH] with torch.no_grad(): prediction, _, _ = model(data) # duration_time = time.time() - start_time if self.feat_vis: featmaps_dict = model.get_feat() from utils.visualizer import FeatVisualizer feat_visualizer = FeatVisualizer() feat_visualizer.visualize_maps(featmaps_dict) # initialize dets for each classes # dets = [[] for class_ind in range(self.n_classes)] dets = [[]] scores = prediction[constants.KEY_CLASSES] boxes_2d = prediction[constants.KEY_BOXES_2D] batch_size = scores.shape[0] scores = scores.view(-1, self.n_classes) new_scores = torch.zeros_like(scores) _, scores_argmax = scores.max(dim=-1) row = torch.arange(0, scores_argmax.numel()).type_as(scores_argmax) new_scores[row, scores_argmax] = scores[row, scores_argmax] scores = new_scores.view(batch_size, -1, self.n_classes) # if step == 6: # import ipdb # ipdb.set_trace() for batch_ind in range(batch_size): boxes_2d_per_img = boxes_2d[batch_ind] scores_per_img = scores[batch_ind] for class_ind in range(1, self.n_classes): # cls thresh inds = torch.nonzero( scores_per_img[:, class_ind] > self.thresh).view(-1) threshed_scores_per_img = scores_per_img[inds, class_ind] if inds.numel() > 0: # if self.class_agnostic: threshed_boxes_2d_per_img = boxes_2d_per_img[inds] # else: # threshed_boxes_2d_per_img = boxes_2d_per_img[ # inds, class_ind * 4:class_ind * 4 + 4] # concat boxes and scores threshed_dets_per_img = torch.cat([ threshed_boxes_2d_per_img, threshed_scores_per_img.unsqueeze(-1), ], dim=-1) # sort by scores _, order = torch.sort(threshed_scores_per_img, 0, True) threshed_dets_per_img = threshed_dets_per_img[order] # nms keep = nms(threshed_dets_per_img[:, :4], threshed_dets_per_img[:, 4], self.nms).view(-1).long() nms_dets_per_img = threshed_dets_per_img[keep].detach( ).cpu().numpy() dets.append(nms_dets_per_img) else: dets.append([]) duration_time = time.time() - end_time label_path = self._generate_label_path(image_path[batch_ind]) self.save_dets(dets, label_path, image_path[batch_ind]) sys.stdout.write('\r{}/{},duration: {}'.format( step + 1, num_samples, duration_time)) sys.stdout.flush() end_time = time.time()
def test_corners_3d(self, dataloader, model, logger): self.logger.info('Start testing') num_samples = len(dataloader) if self.feat_vis: # enable it before forward pass model.enable_feat_vis() end_time = 0 for step, data in enumerate(dataloader): # start_time = time.time() data = common.to_cuda(data) image_path = data[constants.KEY_IMAGE_PATH] with torch.no_grad(): prediction, _, _ = model(data) # duration_time = time.time() - start_time if self.feat_vis: featmaps_dict = model.get_feat() from utils.visualizer import FeatVisualizer feat_visualizer = FeatVisualizer() feat_visualizer.visualize_maps(featmaps_dict) # initialize dets for each classes # dets = [[] for class_ind in range(self.n_classes)] scores = prediction[constants.KEY_CLASSES] boxes_2d = prediction[constants.KEY_BOXES_2D] # dims = prediction[constants.KEY_DIMS] corners_2d = prediction[constants.KEY_CORNERS_2D] # import ipdb # ipdb.set_trace() p2 = data[constants.KEY_STEREO_CALIB_P2_ORIG] # rcnn_3d = prediction['rcnn_3d'] batch_size = scores.shape[0] scores = scores.view(-1, self.n_classes) new_scores = torch.zeros_like(scores) _, scores_argmax = scores.max(dim=-1) row = torch.arange(0, scores_argmax.numel()).type_as(scores_argmax) new_scores[row, scores_argmax] = scores[row, scores_argmax] scores = new_scores.view(batch_size, -1, self.n_classes) # if step == 6: # import ipdb # ipdb.set_trace() for batch_ind in range(batch_size): boxes_2d_per_img = boxes_2d[batch_ind] scores_per_img = scores[batch_ind] # dims_per_img = dims[batch_ind] corners_2d_per_img = corners_2d[batch_ind] p2_per_img = p2[batch_ind] num_cols = corners_2d.shape[-1] dets = [np.zeros((0, 8, num_cols), dtype=np.float32)] dets_2d = [np.zeros((0, 4), dtype=np.float32)] for class_ind in range(1, self.n_classes): # cls thresh inds = torch.nonzero( scores_per_img[:, class_ind] > self.thresh).view(-1) threshed_scores_per_img = scores_per_img[inds, class_ind] if inds.numel() > 0: # if self.class_agnostic: threshed_boxes_2d_per_img = boxes_2d_per_img[inds] # threshed_dims_per_img = dims_per_img[inds] threshed_corners_2d_per_img = corners_2d_per_img[inds] # threshed_rcnn_3d_per_img = rcnn_3d_per_img[inds] # else: # threshed_boxes_2d_per_img = boxes_2d_per_img[ # inds, class_ind * 4:class_ind * 4 + 4] # concat boxes and scores threshed_dets_per_img = torch.cat( [ threshed_boxes_2d_per_img, threshed_scores_per_img.unsqueeze(-1), # threshed_dims_per_img, ], dim=-1) # sort by scores _, order = torch.sort(threshed_scores_per_img, 0, True) threshed_dets_per_img = threshed_dets_per_img[order] threshed_corners_2d_per_img = threshed_corners_2d_per_img[ order] # nms keep = nms(threshed_dets_per_img[:, :4], threshed_dets_per_img[:, 4], self.nms).view(-1).long() nms_dets_per_img = threshed_dets_per_img[keep].detach( ).cpu().numpy() nms_corners_2d_per_img = threshed_corners_2d_per_img[ keep].detach().cpu().numpy() dets.append(nms_corners_2d_per_img) dets_2d.append(nms_dets_per_img[:, :4]) else: dets.append( np.zeros((0, 8, num_cols), dtype=np.float32)) dets_2d.append(np.zeros((0, 4))) # import ipdb # ipdb.set_trace() corners = np.concatenate(dets, axis=0) dets_2d = np.concatenate(dets_2d, axis=0) corners_2d = None corners_3d = None if num_cols == 3: corners_3d = corners else: corners_2d = corners self.visualizer.render_image_corners_2d( image_path[0], boxes_2d=dets_2d, corners_2d=corners_2d, corners_3d=corners_3d, p2=p2_per_img.cpu().numpy()) duration_time = time.time() - end_time # label_path = self._generate_label_path(image_path[batch_ind]) # self.save_mono_3d_dets(dets, label_path) sys.stdout.write('\r{}/{},duration: {}'.format( step + 1, num_samples, duration_time)) sys.stdout.flush() end_time = time.time()
def inference(self, im, p2): """ Args: im: shape(N, 3, H, W) Returns: dets: shape(N, M, 8) """ config = self.config args = self.args eval_config = config['eval_config'] model_config = config['model_config'] data_config = config['eval_data_config'] np.random.seed(eval_config['rng_seed']) self.logger.info('Using config:') pprint.pprint({ 'model_config': model_config, 'data_config': data_config, 'eval_config': eval_config }) eval_out = eval_config['eval_out'] if not os.path.exists(eval_out): self.logger.info('creat eval out directory {}'.format(eval_out)) os.makedirs(eval_out) else: self.logger.warning('dir {} exist already!'.format(eval_out)) # restore from random or checkpoint restore = True # two methods to load model # 1. load from any other dirs,it just needs config and model path # 2. load from training dir if args.model is not None: # assert args.model is not None, 'please determine model or checkpoint' # it should be a path to model checkpoint_name = os.path.basename(args.model) input_dir = os.path.dirname(args.model) elif args.checkpoint is not None: checkpoint_name = 'detector_{}.pth'.format(args.checkpoint) assert args.load_dir is not None, 'please choose a directory to load checkpoint' eval_config['load_dir'] = args.load_dir input_dir = os.path.join(eval_config['load_dir'], model_config['type'], data_config['name']) if not os.path.exists(input_dir): raise Exception( 'There is no input directory for loading network from {}'. format(input_dir)) else: restore = False # log for restore if restore: self.logger.info("restore from checkpoint") else: self.logger.info("use pytorch default initialization") # model model = detectors.build(model_config) model.eval() if restore: # saver saver = Saver(input_dir) saver.load({'model': model}, checkpoint_name) model = model.cuda() # dataloader = dataloaders.make_data_loader(data_config, training=False) self.logger.info('Start testing') # num_samples = len(dataloader) # for step, data in enumerate(dataloader): data = self.preprocess(im, p2) data = self.to_batch(data) data = common.to_cuda(data) # image_path = data[constants.KEY_IMAGE_PATH] with torch.no_grad(): prediction = model(data) # initialize dets for each classes dets = [[]] scores = prediction[constants.KEY_CLASSES] boxes_2d = prediction[constants.KEY_BOXES_2D] dims = prediction[constants.KEY_DIMS] orients = prediction[constants.KEY_ORIENTS_V2] p2 = data[constants.KEY_STEREO_CALIB_P2_ORIG] # rcnn_3d = prediction['rcnn_3d'] batch_size = scores.shape[0] scores = scores.view(-1, self.n_classes) new_scores = torch.zeros_like(scores) _, scores_argmax = scores.max(dim=-1) row = torch.arange(0, scores_argmax.numel()).type_as(scores_argmax) new_scores[row, scores_argmax] = scores[row, scores_argmax] scores = new_scores.view(batch_size, -1, self.n_classes) boxes_2d_per_img = boxes_2d[0] scores_per_img = scores[0] dims_per_img = dims[0] orients_per_img = orients[0] p2_per_img = p2[0] # rcnn_3d_per_img = rcnn_3d[batch_ind] # import ipdb # ipdb.set_trace() for class_ind in range(1, self.n_classes): # cls thresh inds = torch.nonzero( scores_per_img[:, class_ind] > self.thresh).view(-1) threshed_scores_per_img = scores_per_img[inds, class_ind] if inds.numel() > 0: threshed_boxes_2d_per_img = boxes_2d_per_img[inds] threshed_dims_per_img = dims_per_img[inds] threshed_orients_per_img = orients_per_img[inds] threshed_dets_per_img = torch.cat([ threshed_boxes_2d_per_img, threshed_scores_per_img.unsqueeze(-1), threshed_dims_per_img, threshed_orients_per_img.unsqueeze(-1) ], dim=-1) # sort by scores _, order = torch.sort(threshed_scores_per_img, 0, True) threshed_dets_per_img = threshed_dets_per_img[order] # nms keep = nms(threshed_dets_per_img[:, :4], threshed_dets_per_img[:, 4], self.nms).view(-1).long() nms_dets_per_img = threshed_dets_per_img[keep].detach().cpu( ).numpy() # calculate location location = geometry_utils.calc_location( nms_dets_per_img[:, 5:8], nms_dets_per_img[:, :5], nms_dets_per_img[:, 8], p2_per_img.cpu().numpy()) nms_dets_per_img = np.concatenate( [ nms_dets_per_img[:, :5], nms_dets_per_img[:, 5:8], location, nms_dets_per_img[:, -1:] ], axis=-1) dets.append(nms_dets_per_img) else: dets.append([]) # duration_time = time.time() - end_time # label_path = self._generate_label_path(image_path[batch_ind]) # self.save_mono_3d_dets(dets, label_path) # sys.stdout.write('\r{}/{},duration: {}'.format( # step + 1, num_samples, duration_time)) # sys.stdout.flush() # end_time = time.time() # xmin, ymin, xmax, ymax, cf, h, w, l, x, y, z, ry return dets
def evaluation(name, net=None, vis=False, cuda=True, class_agnostic=False): cfg.TRAIN.USE_FLIPPED = False imdb, roidb, ratio_list, ratio_index = combined_roidb(name, False) imdb.competition_mode(on=True) print('{:d} roidb entries'.format(len(roidb))) if not net: input_dir = args.load_dir + "/" + args.net + "/" + args.dataset # input_dir = 'weight' if not os.path.exists(input_dir): raise Exception( 'There is no input directory for loading network from ' + input_dir) # load_name = os.path.join(input_dir, # 'faster_rcnn_{}_{}_{}.pth'.format(args.checksession, args.checkepoch, args.checkpoint)) load_name = os.path.join( input_dir, 'faster_rcnn_{}_best.pth'.format(cfg['POOLING_MODE'])) # initilize the network here. if args.net == 'vgg16': fasterRCNN = vgg16(imdb.classes, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res101': fasterRCNN = resnet(imdb.classes, 101, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res50': fasterRCNN = resnet(imdb.classes, 50, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res152': fasterRCNN = resnet(imdb.classes, 152, pretrained=False, class_agnostic=args.class_agnostic) else: print("network is not defined") pdb.set_trace() fasterRCNN.create_architecture() print("load checkpoint %s" % (load_name)) checkpoint = torch.load(load_name) fasterRCNN.load_state_dict(checkpoint['model']) if 'pooling_mode' in checkpoint.keys(): cfg.POOLING_MODE = checkpoint['pooling_mode'] print('load model successfully!') else: fasterRCNN = net # initilize the tensor holder here. im_data = torch.FloatTensor(1) im_info = torch.FloatTensor(1) num_boxes = torch.LongTensor(1) gt_boxes = torch.FloatTensor(1) # ship to cuda if cuda: im_data = im_data.cuda() im_info = im_info.cuda() num_boxes = num_boxes.cuda() gt_boxes = gt_boxes.cuda() # make variable im_data = Variable(im_data) im_info = Variable(im_info) num_boxes = Variable(num_boxes) gt_boxes = Variable(gt_boxes) if cuda: cfg.CUDA = True if cuda: fasterRCNN.cuda() start = time.time() max_per_image = 100 # vis = args.vis if vis: thresh = 0.05 else: thresh = 0.0 save_name = 'faster_rcnn_10' num_images = len(imdb.image_index) all_boxes = [[[] for _ in range(num_images)] for _ in range(imdb.num_classes)] output_dir = get_output_dir(imdb, save_name) dataset = roibatchLoader(roidb, ratio_list, ratio_index, 1, \ imdb.num_classes, training=False, normalize=False) dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False, num_workers=0, pin_memory=True) data_iter = iter(dataloader) _t = {'im_detect': time.time(), 'misc': time.time()} det_file = os.path.join(output_dir, 'detections.pkl') fasterRCNN.eval() empty_array = np.transpose(np.array([[], [], [], [], []]), (1, 0)) for i in range(num_images): data = next(data_iter) with torch.no_grad(): im_data.resize_(data[0].size()).copy_(data[0]) im_info.resize_(data[1].size()).copy_(data[1]) gt_boxes.resize_(data[2].size()).copy_(data[2]) num_boxes.resize_(data[3].size()).copy_(data[3]) det_tic = time.time() rois, cls_prob, bbox_pred, \ rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, RCNN_loss_bbox, \ rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes) scores = cls_prob.data boxes = rois.data[:, :, 1:5] if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred.data if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev if class_agnostic: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(1, -1, 4) else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(1, -1, 4 * len(imdb.classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) pred_boxes /= data[1][0][2].item() scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() det_toc = time.time() detect_time = det_toc - det_tic misc_tic = time.time() if vis: im = cv2.imread(imdb.image_path_at(i)) im2show = np.copy(im) for j in range(1, imdb.num_classes): inds = torch.nonzero(scores[:, j] > thresh).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[:, j][inds] _, order = torch.sort(cls_scores, 0, True) if class_agnostic: cls_boxes = pred_boxes[inds, :] else: cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) # cls_dets = torch.cat((cls_boxes, cls_scores), 1) cls_dets = cls_dets[order] keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS) cls_dets = cls_dets[keep.view(-1).long()] if vis: im2show = vis_detections(im2show, imdb.classes[j], cls_dets.cpu().numpy(), 0.3) all_boxes[j][i] = cls_dets.cpu().numpy() else: all_boxes[j][i] = empty_array # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack( [all_boxes[j][i][:, -1] for j in range(1, imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in range(1, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] misc_toc = time.time() nms_time = misc_toc - misc_tic sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s \r' \ .format(i + 1, num_images, detect_time, nms_time)) sys.stdout.flush() if vis: cv2.imwrite('result.png', im2show) pdb.set_trace() # cv2.imshow('test', im2show) # cv2.waitKey(0) with open(det_file, 'wb') as f: pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL) print('Evaluating detections') map = imdb.evaluate_detections(all_boxes, output_dir) # print(map) end = time.time() print("test time: %0.4fs" % (end - start)) return map
def validate_voc(val_loader, S_RAD, epoch, num_class, num_segments, session, batch_size, cfg, log, dataset, pathway, eval_metrics): val_iters_per_epoch = int(np.round(len(val_loader))) S_RAD.eval() all_boxes = [[[[] for _ in range(num_class)] for _ in range(batch_size * num_segments)] for _ in range(val_iters_per_epoch)] bbox = [[[[] for _ in range(num_class)] for _ in range(batch_size * num_segments)] for _ in range(val_iters_per_epoch)] #limit the number of proposal per image across all the class max_per_image = cfg.MAX_DET_IMG #confusion matrix conf_mat = ConfusionMatrix(num_classes=num_class, CONF_THRESHOLD=0.8, IOU_THRESHOLD=0.2, dataset=dataset) num_gt = [0 for _ in range(num_class)] #data_iter = iter(val_loader) for step, data in enumerate(val_loader): #evaluate /inference code #start_time = time.time() rois, cls_prob, bbox_pred = S_RAD(data) #torch.cuda.synchronize() #end_time = time.time() - start_time if dataset == 'ucfsport': class_dict = act2id elif dataset == 'jhmdb': class_dict = jhmdbact2id elif dataset == 'ucf24': class_dict = ucf24act2id elif dataset == 'urfall': class_dict = fallactivity2id elif dataset == 'imfd': class_dict = imfallactivity2id scores = cls_prob.data boxes = rois.data[:, :, 1:5] box_deltas = bbox_pred.data box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(scores.shape[0], -1, 4 * num_class) #transforms the image to x1,y1,x2,y2, format and clips the coord to images pred_boxes = bbox_transform_inv(boxes, box_deltas, scores.shape[0]) if pathway == "two_pathway": im_info = data[0][3].view(-1, 3).to(device="cuda") gt_boxes = (data[0][1].view(-1, cfg.MAX_NUM_GT_BOXES, num_class + 4)).to(device="cuda") else: im_info = data[3].view(-1, 3).to(device="cuda") gt_boxes = (data[1].view(-1, cfg.MAX_NUM_GT_BOXES, num_class + 4)).to(device="cuda") pred_boxes = clip_boxes(pred_boxes, im_info.data, scores.shape[0]) #gt boxes gtbb = gt_boxes[:, :, 0:4] gtlabels = gt_boxes[:, :, 4:] #move the groudtruth to cpu gtbb = gtbb.cpu().numpy() gtlabels = gtlabels.cpu().numpy() #count = 0 for image in range(pred_boxes.shape[0]): for class_id in range(1, num_class): inds = torch.nonzero(scores[image, :, class_id] > 0).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[image, inds, class_id] #arranging in descending order _, order = torch.sort(cls_scores, 0, True) cls_boxes = pred_boxes[image, inds, class_id * 4:(class_id + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) cls_dets = cls_dets[order, :] keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS) cls_dets = cls_dets[keep.view(-1)] all_boxes[step][image][class_id] = cls_dets.cpu().numpy() #collect groud truth boxes for the image index = np.unique(np.nonzero(gtbb[image])[0]) gtbox = gtbb[image][index] label = gtlabels[image][index] #take groundtruth box only if the label =1 for that class bbox[step][image][class_id] = [ gtbox[i] for i in range(len(label)) if label[i, class_id] ] num_gt[class_id] += np.sum(len(bbox[step][image][class_id])) if eval_metrics: if len(bbox[step][image][class_id]) > 0 and len( all_boxes[step][image][class_id]) > 0: conf_mat.process_batch(all_boxes[step][image], bbox[step][image]) if eval_metrics: result = conf_mat.return_matrix() print(result) conf_mat.plot(result) ap = [None for _ in range(num_class)] #calculate fp anf tp for each detections for cls_id in range(1, num_class): tpfp = [] class_det = [] for video in range(len(all_boxes)): for batch in range(len(all_boxes[0])): tp_fp = (tpfp_default(all_boxes[video][batch][cls_id],\ bbox[video][batch][cls_id],iou_thr=0.5)) if (len(tp_fp) > 0 and len(all_boxes[video][batch][cls_id]) > 0): tpfp.append(tp_fp) class_det.append(all_boxes[video][batch][cls_id]) assert len(tpfp) == len(class_det) tp, fp = tuple(zip(*tpfp)) # sort all det bboxes by score, also sort tp and fp cls_det = np.vstack(class_det) num_dets = cls_det.shape[0] sort_inds = np.argsort(-cls_det[:, -1]) tp = np.hstack(tp)[:, sort_inds] fp = np.hstack(fp)[:, sort_inds] # calculate recall and precision with tp and fp tp = np.cumsum(tp, axis=1) fp = np.cumsum(fp, axis=1) eps = np.finfo(np.float32).eps recalls = tp / np.maximum(num_gt[cls_id], eps) precisions = tp / np.maximum((tp + fp), eps) #ROC curve visualisation if eval_metrics: import matplotlib.pyplot as plt colors = [ 'ac', 'navy', 'gold', 'turquoise', 'red', 'green', 'black', 'brown', 'darkorange', 'cornflowerblue', 'teal' ] plt.plot(recalls[0, :], precisions[0, :], color=colors[cls_id], lw=2, label='class {}'.format(cls_id)) ap[cls_id] = average_precision(recalls[0, :], precisions[0, :], mode='area') #Plot ROC Curve if eval_metrics: fig = plt.gcf() fig.subplots_adjust(bottom=0.25) plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('Recall') plt.ylabel('Precision') plt.title('Extension of Precision-Recall curve to multi-class') plt.legend(loc="best") plt.show() for k, v in class_dict.items(): #print("Average precision per class:") out = ("class [{0}]:{1} |gt:{2}".format(k, ap[v], num_gt[v])) print(out) log.write(out + '\n') mAP = ("mAP for epoch [{0}] is : {1}".format(epoch, mean(ap[1:]))) print(mAP) log.write(mAP + '\n') log.flush() print("----------------------------------------------")
def validate_virat(val_loader, S_RAD, epoch, num_class, num_segments, vis, session, batch_size, input_data, cfg, log, dataset): val_iters_per_epoch = int(np.round(len(val_loader))) im_data, im_info, num_boxes, gt_boxes = input_data S_RAD.eval() all_boxes = [[[[] for _ in range(num_class)] for _ in range(batch_size * num_segments)] for _ in range(val_iters_per_epoch)] #limit the number of proposal per image across all the class max_per_image = cfg.MAX_DET_IMG #dict with matched detections and its score @class_idx eval_target = {one: 1 for one in activity2id_person} e = {one: {} for one in eval_target} # cat_id -> imgid -> {"dm","dscores"} #unique image id imgid = 0 num_gt = [0 for _ in range(num_class)] for step, data in enumerate(val_loader): im_data.resize_(data[0].size()).copy_(data[0]) gt_boxes.resize_(data[1].size()).copy_(data[1]) num_boxes.resize_(data[2].size()).copy_(data[2]) im_info.resize_(data[3].size()).copy_(data[3]) im_data = im_data.view(-1, im_data.size(2), im_data.size(3), im_data.size(4)) im_info = im_info.view(-1, 3) gt_boxes = gt_boxes.view(-1, cfg.MAX_NUM_GT_BOXES, num_class + 4) num_boxes = num_boxes.view(-1) #evaluate /inference cpde start = time.time() rois, cls_prob, bbox_pred = S_RAD(im_data, im_info, gt_boxes, num_boxes) torch.cuda.synchronize() end_time = time.time() - start scores = cls_prob.data boxes = rois.data[:, :, 1:5] #batch_size = rois.shape[0] box_deltas = bbox_pred.data box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(scores.shape[0], -1, 4 * num_class) #transforms the image to x1,y1,x2,y2, format and clips the coord to images pred_boxes = bbox_transform_inv(boxes, box_deltas, scores.shape[0]) pred_boxes = clip_boxes(pred_boxes, im_info.data, scores.shape[0]) #gt boxes gtbb = gt_boxes[:, :, 0:4] gtlabels = gt_boxes[:, :, 4:] #pred_boxes /= data[3][0][1][2].item() #gtbb /= data[3][0][1][2].item() #move the groudtruth to cpu gtbb = gtbb.cpu().numpy() gtlabels = gtlabels.cpu().numpy() #count = 0 for image in range(pred_boxes.shape[0]): box = [None for _ in range(num_class)] imgid += 1 for class_id in range(1, num_class): inds = torch.nonzero( scores[image, :, class_id] > cfg.VIRAT.SCORE_THRES).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[image, inds, class_id] #arranging in descending order _, order = torch.sort(cls_scores, 0, True) cls_boxes = pred_boxes[image, inds, class_id * 4:(class_id + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) cls_dets = cls_dets[order, :] keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS) cls_dets = cls_dets[keep.view(-1)] all_boxes[step][image][class_id] = cls_dets.cpu().numpy() #collect groud truth boxes for the image index = np.unique(np.nonzero(gtbb[image])[0]) gtbox = gtbb[image][index] label = gtlabels[image][index] #take groundtruth box only if the label =1 for that class box[class_id] = [ gtbox[i] for i in range(len(label)) if label[i, class_id] ] num_gt[class_id] += np.sum(len(box[class_id])) match_dt_gt(e, imgid, all_boxes[step][image], box, activity2id_person) if (step + 1) % 50 == 0: output = ('Test: [{0}/{1}]\t'.format(step, (val_iters_per_epoch))) print(output) aps = aggregate_eval(e, maxDet=max_per_image) mAP = (mean(aps[target] for target in aps.keys())) for k, v in aps.items(): output = ('class: [{0}] - {1}'.format(k, v)) log.write(output + '\n') print(output) mAPout = ('mAP at epoch {0}: {1}'.format(epoch, mAP)) print('mAP at epoch {0}: {1} \n'.format(epoch, mAP)) log.write(mAPout + '\n') log.flush()
def postprocess(self, instance, im_info): # TODO create a new Function """ Args: rpn_cls_probs: FloatTensor,shape(N,2*num_anchors,H,W) rpn_bbox_preds: FloatTensor,shape(N,num_anchors*4,H,W) anchors: FloatTensor,shape(N,4,H,W) Returns: proposals_batch: FloatTensor, shape(N,post_nms_topN,4) fg_probs_batch: FloatTensor, shape(N,post_nms_topN) """ proposals = instance[constants.KEY_BOXES_2D] rpn_cls_probs = instance[constants.KEY_OBJECTNESS] batch_size = rpn_cls_probs.shape[0] # filer and clip proposals = box_ops.clip_boxes(proposals, im_info) # fg prob fg_probs = rpn_cls_probs[..., 1] # sort fg _, fg_probs_order = torch.sort(fg_probs, dim=1, descending=True) proposals_batch = torch.zeros(batch_size, self.post_nms_topN, 4).type_as(proposals) proposals_order = torch.zeros( batch_size, self.post_nms_topN).fill_(-1).type_as(fg_probs_order) for i in range(batch_size): proposals_single = proposals[i] fg_probs_single = fg_probs[i] fg_order_single = fg_probs_order[i] # pre nms if self.pre_nms_topN > 0: fg_order_single = fg_order_single[:self.pre_nms_topN] proposals_single = proposals_single[fg_order_single] fg_probs_single = fg_probs_single[fg_order_single] # nms keep_idx_i = nms(proposals_single, fg_probs_single, self.nms_thresh) keep_idx_i = keep_idx_i.long().view(-1) # post nms if self.post_nms_topN > 0: keep_idx_i = keep_idx_i[:self.post_nms_topN] proposals_single = proposals_single[keep_idx_i, :] fg_probs_single = fg_probs_single[keep_idx_i] fg_order_single = fg_order_single[keep_idx_i] # padding 0 at the end. num_proposal = keep_idx_i.numel() proposals_batch[i, :num_proposal, :] = proposals_single # fg_probs_batch[i, :num_proposal] = fg_probs_single proposals_order[i, :num_proposal] = fg_order_single instance[constants.KEY_BOXES_2D] = proposals_batch # TODO(assign rpn_cls_probs) return instance
def mono_test_keypoint(eval_config, data_loader, model): """ Only one image in batch is supported """ num_samples = len(data_loader) for i, data in enumerate(data_loader): img_file = data['img_name'] start_time = time.time() pred_boxes, scores, rois, anchors, rcnn_3d, keypoints = im_detect( model, to_cuda(data), eval_config, im_orig=data['img_orig']) duration_time = time.time() - start_time # import ipdb # ipdb.set_trace() scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() rois = rois.squeeze() rcnn_3d = rcnn_3d.squeeze() keypoints = keypoints.squeeze() # anchors = anchors.squeeze() classes = eval_config['classes'] thresh = eval_config['thresh'] dets = [] res_rois = [] res_anchors = [] dets_3d = [] keypoint_dets = [] # import ipdb # ipdb.set_trace() # nms for j in range(1, len(classes)): inds = torch.nonzero(scores[:, j] > thresh).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[:, j][inds] _, order = torch.sort(cls_scores, 0, True) if eval_config['class_agnostic']: cls_boxes = pred_boxes[inds, :] rois_boxes = rois[inds, :] anchors_boxes = anchors[inds, :] rcnn_3d = rcnn_3d[inds] keypoints = keypoints[inds] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) rois_dets = torch.cat((rois_boxes, cls_scores.unsqueeze(1)), 1) anchors_dets = torch.cat( (anchors_boxes, cls_scores.unsqueeze(1)), 1) cls_dets = cls_dets[order] rois_dets = rois_dets[order] anchors_dets = anchors_dets[order] rcnn_3d = rcnn_3d[order] keypoints = keypoints[order] keep = nms(cls_dets, eval_config['nms']) cls_dets = cls_dets[keep.view(-1).long()] rois_dets = rois_dets[keep.view(-1).long()] anchors = anchors_dets[keep.view(-1).long()] rcnn_3d = rcnn_3d[keep.view(-1).long()] keypoints = keypoints[keep.view(-1).long()] cls_dets = cls_dets.detach().cpu().numpy() res_rois.append(rois_dets.detach().cpu().numpy()) res_anchors.append(anchors.detach().cpu().numpy()) coords = data['coords'][0].detach().cpu().numpy() gt_boxes = data['gt_boxes'][0].detach().cpu().numpy() gt_boxes_3d = data['gt_boxes_3d'][0].detach().cpu().numpy() points_3d = data['points_3d'][0].detach().cpu().numpy() local_angles_gt = data['local_angle'][0].detach().cpu().numpy() local_angle_oritation_gt = data['local_angle_oritation'][ 0].detach().cpu().numpy() encoded_side_points = data['encoded_side_points'][0].detach( ).cpu().numpy() points_3d = points_3d.T p2 = data['p2'][0].detach().cpu().numpy() rcnn_3d = rcnn_3d.detach().cpu().numpy() keypoints = keypoints.detach().cpu().numpy() # rcnn_3d_gt = rcnn_3d_gt.detach().cpu().numpy() # use gt use_gt = False if use_gt: keypoints_gt = data['keypoint_gt'][0].detach().cpu().numpy( ) # import ipdb # ipdb.set_trace() center_x = (gt_boxes[:, 0] + gt_boxes[:, 2]) / 2 center_y = (gt_boxes[:, 1] + gt_boxes[:, 3]) / 2 gt_boxes_w = gt_boxes[:, 2] - gt_boxes[:, 0] + 1 gt_boxes_h = gt_boxes[:, 3] - gt_boxes[:, 1] + 1 center = np.stack([center_x, center_y], axis=-1) gt_boxes_dims = np.stack([gt_boxes_w, gt_boxes_h], axis=-1) point1 = encoded_side_points[:, :2] * gt_boxes_dims + center point2 = encoded_side_points[:, 2:] * gt_boxes_dims + center global_angles_gt = gt_boxes_3d[:, -1:] rcnn_3d_gt = np.concatenate( [gt_boxes_3d[:, :3], point1, point2], axis=-1) # just for debug if len(rcnn_3d_gt): cls_dets_gt = np.concatenate( [gt_boxes, np.zeros_like(gt_boxes[:, -1:])], axis=-1) rcnn_3d_gt, _ = mono_3d_postprocess_bbox( rcnn_3d_gt, cls_dets_gt, p2) dets.append( np.concatenate([cls_dets_gt, rcnn_3d_gt], axis=-1)) keypoint_dets.append(keypoints_gt) else: dets.append([]) res_rois.append([]) res_anchors.append([]) dets_3d.append([]) keypoint_dets.append([]) else: # import ipdb # ipdb.set_trace() # sample_name = os.path.splitext(os.path.basename(data['img_name'][0]))[0] # if sample_name=='000031': # import ipdb # ipdb.set_trace() # rcnn_3d[:, :-1] = gt_boxes_3d[:, :3] # global_angles_gt = gt_boxes_3d[:, -1:] # rcnn_3d = np.concatenate( # [gt_boxes_3d[:, :3], global_angles_gt], axis=-1) # rcnn_3d[:,3] = 1-rcnn_3d[:,3] rcnn_3d, location = mono_3d_postprocess_bbox( rcnn_3d, cls_dets, p2) # rcnn_3d = mono_3d_postprocess_angle(rcnn_3d, cls_dets, p2) # rcnn_3d = mono_3d_postprocess_depth(rcnn_3d, cls_dets, p2) # rcnn_3d[:, 3:6] = location # rcnn_3d = np.zeros((cls_dets.shape[0], 7)) dets.append(np.concatenate([cls_dets, rcnn_3d], axis=-1)) keypoints = keypoints.reshape((keypoints.shape[0], -1)) keypoint_dets.append(keypoints) else: dets.append([]) res_rois.append([]) res_anchors.append([]) dets_3d.append([]) keypoint_dets.append([]) # import ipdb # ipdb.set_trace() save_dets(dets, img_file[0], 'kitti', eval_config['eval_out']) save_keypoints(keypoint_dets[0], img_file[0]) # save_dets(res_rois[0], img_file[0], 'kitti', # eval_config['eval_out_rois']) # save_dets(res_anchors[0], img_file[0], 'kitti', # eval_config['eval_out_anchors']) sys.stdout.write('\r{}/{},duration: {}'.format(i + 1, num_samples, duration_time)) sys.stdout.flush()
def mono_test(eval_config, data_loader, model): """ Only one image in batch is supported """ num_samples = len(data_loader) end_time = 0 for i, data in enumerate(data_loader): data_time = time.time() - end_time img_file = data['img_name'] start_time = time.time() pred_boxes, scores, rois, anchors, rcnn_3d = im_detect( model, to_cuda(data), eval_config, im_orig=data['img_orig']) det_time = time.time() - start_time # import ipdb # ipdb.set_trace() scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() rois = rois.squeeze() rcnn_3d = rcnn_3d.squeeze() # anchors = anchors.squeeze() classes = eval_config['classes'] thresh = eval_config['thresh'] # print(thresh) # thresh = 0.3 dets = [] res_rois = [] res_anchors = [] dets_3d = [] # import ipdb # ipdb.set_trace() # nms # new_scores = torch.zeros_like(scores) # _, scores_argmax = scores.max(dim=-1) # row = torch.arange(0, scores.shape[0]).type_as(scores_argmax) # new_scores[row, scores_argmax] = scores[row, scores_argmax] for j in range(1, len(classes) + 1): inds = torch.nonzero(scores[:, j] > thresh).view(-1) # if there is det post_start_time = time.time() if inds.numel() > 0: cls_scores = scores[:, j][inds] cls_boxes = pred_boxes[inds, :] # rois_boxes = rois[inds, :] # anchors_boxes = anchors[inds, :] # if not eval_config['class_agnostic_3d']: # rcnn_3d_dets = torch.cat( # [rcnn_3d[inds, j * 3:j * 3 + 3], rcnn_3d[inds, -4:]], # dim=-1) # else: rcnn_3d_dets = rcnn_3d[inds] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) # rois_dets = torch.cat((rois_boxes, cls_scores.unsqueeze(1)), 1) # anchors_dets = torch.cat( # (anchors_boxes, cls_scores.unsqueeze(1)), 1) # sort _, order = torch.sort(cls_scores, 0, True) cls_dets = cls_dets[order] # rois_dets = rois_dets[order] # anchors_dets = anchors_dets[order] rcnn_3d_dets = rcnn_3d_dets[order] keep = nms(cls_dets[:, :4], cls_dets[:, -1], eval_config['nms']) cls_dets = cls_dets[keep.view(-1).long()] # rois_dets = rois_dets[keep.view(-1).long()] # anchors = anchors_dets[keep.view(-1).long()] rcnn_3d_dets = rcnn_3d_dets[keep.view(-1).long()] cls_dets = cls_dets.detach().cpu().numpy() # res_rois.append(rois_dets.detach().cpu().numpy()) # res_anchors.append(anchors.detach().cpu().numpy()) coords = data['coords'][0].detach().cpu().numpy() gt_boxes = data['gt_boxes'][0].detach().cpu().numpy() gt_boxes_2d_proj = data['gt_boxes_proj'][0].detach().cpu( ).numpy() gt_boxes_3d = data['gt_boxes_3d'][0].detach().cpu().numpy() points_3d = data['points_3d'][0].detach().cpu().numpy() local_angles_gt = data['local_angle'][0].detach().cpu().numpy() local_angle_oritation_gt = data['local_angle_oritation'][ 0].detach().cpu().numpy() encoded_side_points = data['encoded_side_points'][0].detach( ).cpu().numpy() points_3d = points_3d.T p2 = data['orig_p2'][0].detach().cpu().numpy() rcnn_3d_dets = rcnn_3d_dets.detach().cpu().numpy() cls_orient_gt = data['cls_orient'][0].detach().cpu().numpy() reg_orient_gt = data['reg_orient'][0].detach().cpu().numpy() # rcnn_3d_gt = rcnn_3d_gt.detach().cpu().numpy() # use gt use_gt = False post_time = 0 if use_gt: # import ipdb # ipdb.set_trace() # center_x = (gt_boxes[:, 0] + gt_boxes[:, 2]) / 2 # center_y = (gt_boxes[:, 1] + gt_boxes[:, 3]) / 2 # gt_boxes_w = gt_boxes[:, 2] - gt_boxes[:, 0] + 1 # gt_boxes_h = gt_boxes[:, 3] - gt_boxes[:, 1] + 1 # center = np.stack([center_x, center_y], axis=-1) # gt_boxes_dims = np.stack([gt_boxes_w, gt_boxes_h], axis=-1) # point1 = encoded_side_points[:, :2] * gt_boxes_dims + center # point2 = encoded_side_points[:, 2:] * gt_boxes_dims + center # global_angles_gt = gt_boxes_3d[:, -1:] rcnn_3d_gt = np.concatenate([ gt_boxes_3d[:, :3], cls_orient_gt[..., np.newaxis], reg_orient_gt ], axis=-1) # just for debug if len(gt_boxes): cls_dets_gt = np.concatenate( [gt_boxes, np.zeros_like(gt_boxes[:, -1:])], axis=-1) cls_dets_2d_proj_gt = np.concatenate([ gt_boxes_2d_proj, np.zeros_like(gt_boxes[:, -1:]) ], axis=-1) rcnn_3d_gt, _ = mono_3d_postprocess_bbox( rcnn_3d_gt, cls_dets_2d_proj_gt, p2) dets.append( np.concatenate([cls_dets_2d_proj_gt, rcnn_3d_gt], axis=-1)) else: dets.append([]) res_rois.append([]) res_anchors.append([]) dets_3d.append([]) else: # import ipdb # ipdb.set_trace() # sample_name = os.path.splitext(os.path.basename(data['img_name'][0]))[0] # if sample_name=='000031': # import ipdb # ipdb.set_trace() # rcnn_3d[:, :-1] = gt_boxes_3d[:, :3] # global_angles_gt = gt_boxes_3d[:, -1:] # rcnn_3d = np.concatenate( # [gt_boxes_3d[:, :3], global_angles_gt], axis=-1) # rcnn_3d[:,3] = 1-rcnn_3d[:,3] # rcnn_3d_dets, location = mono_3d_postprocess_bbox( # rcnn_3d_dets, cls_dets, p2) post_time = time.time() - post_start_time # rcnn_3d = mono_3d_postprocess_angle(rcnn_3d, cls_dets, p2) # rcnn_3d = mono_3d_postprocess_depth(rcnn_3d, cls_dets, p2) # rcnn_3d[:, 3:6] = location # rcnn_3d = np.zeros((cls_dets.shape[0], 7)) dets.append( np.concatenate([cls_dets, rcnn_3d_dets], axis=-1)) else: dets.append([]) res_rois.append([]) res_anchors.append([]) dets_3d.append([]) post_time = 0 duration_time = time.time() - end_time # import ipdb # ipdb.set_trace() save_dets(dets, img_file[0], 'kitti', eval_config['eval_out'], classes_name=eval_config['classes']) # save_dets(res_rois[0], img_file[0], 'kitti', # eval_config['eval_out_rois']) # save_dets(res_anchors[0], img_file[0], 'kitti', # eval_config['eval_out_anchors']) sys.stdout.write( '\r{}/{},duration: {}, det_time: {}, post_time: {}, data_time: {}'. format(i + 1, num_samples, duration_time, det_time, post_time, data_time)) sys.stdout.flush() end_time = time.time()
def test_2d(eval_config, data_loader, model): """ Only one image in batch is supported """ num_samples = len(data_loader) for i, data in enumerate(data_loader): img_file = data['img_name'] start_time = time.time() pred_boxes, scores, rois, anchors = im_detect_2d( model, to_cuda(data), eval_config, im_orig=data['img_orig']) duration_time = time.time() - start_time # import ipdb # ipdb.set_trace() scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() rois = rois.squeeze() classes = eval_config['classes'] thresh = eval_config['thresh'] # thresh = 0.1 # import ipdb # ipdb.set_trace() dets = [] res_rois = [] res_anchors = [] dets_3d = [] n_classes = (len(classes) + 1) # nms # import ipdb # ipdb.set_trace() for j in range(1, len(classes) + 1): inds = torch.nonzero(scores[:, j] > thresh).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[inds, j] if not eval_config['class_agnostic']: pred_boxes_per_class = pred_boxes.contiguous().view( -1, 4 * n_classes)[:, j * 4:(j + 1) * 4] cls_boxes = pred_boxes_per_class[inds, :] else: cls_boxes = pred_boxes[inds, :] # rois_boxes = rois[inds, :] # anchors_boxes = anchors[inds, :] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) # rois_dets = torch.cat((rois_boxes, cls_scores.unsqueeze(1)), 1) # anchors_dets = torch.cat( # (anchors_boxes, cls_scores.unsqueeze(1)), 1) # sort _, order = torch.sort(cls_scores, 0, True) cls_dets = cls_dets[order] # rois_dets = rois_dets[order] # anchors_dets = anchors_dets[order] keep = nms(cls_dets, eval_config['nms']) cls_dets = cls_dets[keep.view(-1).long()] # rois_dets = rois_dets[keep.view(-1).long()] # anchors = anchors_dets[keep.view(-1).long()] # res_rois.append(rois_dets.detach().cpu().numpy()) # res_anchors.append(anchors.detach().cpu().numpy()) rcnn_3d = np.zeros((cls_dets.shape[0], 7)) dets.append(np.concatenate([cls_dets, rcnn_3d], axis=-1)) else: dets.append([]) res_rois.append([]) res_anchors.append([]) dets_3d.append([]) # import ipdb # ipdb.set_trace() save_dets(dets, img_file[0], 'kitti', eval_config['eval_out'], classes_name=eval_config['classes']) sys.stdout.write('\r{}/{},duration: {}'.format(i + 1, num_samples, duration_time)) sys.stdout.flush()
def forward(self, im_data, im_info, gt_boxes, num_boxes): batch_size = im_data.size(0) im_info = im_info.data gt_boxes = gt_boxes.data num_boxes = num_boxes.data # feed image data to base model to obtain base feature map base_feat = self.RCNN_base(im_data) # feed base feature map tp RPN to obtain rois rois, rpn_loss_cls, rpn_loss_bbox, fg_scores, rpn_reg_loss = \ self.RCNN_rpn(base_feat, im_info, gt_boxes, num_boxes) rpn_prior_loss = torch.FloatTensor([0.]).cuda() # if it is training phrase, then use ground trubut bboxes for refining if self.training: roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes) rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data rois_label = Variable(rois_label.view(-1).long()) rois_target = Variable(rois_target.view(-1, rois_target.size(2))) rois_inside_ws = Variable( rois_inside_ws.view(-1, rois_inside_ws.size(2))) rois_outside_ws = Variable( rois_outside_ws.view(-1, rois_outside_ws.size(2))) if self.rpn_prior_weight != 0.: for i in range(batch_size): gt_num = num_boxes[i].detach().cpu().item() score = fg_scores[i] score_sum = score.sum().detach().cpu().item() score = score / score_sum log_score = score * torch.log(score + 1e-6) # p * log(p) rpn_prior_loss += (-1. * log_score.sum() / float(gt_num)) rpn_prior_loss /= batch_size rpn_prior_loss *= self.rpn_prior_weight else: rois_label = None rois_target = None rois_inside_ws = None rois_outside_ws = None rpn_loss_cls = torch.FloatTensor([0.]).cuda() rpn_loss_bbox = torch.FloatTensor([0.]).cuda() rois = Variable(rois) # do roi pooling based on predicted rois if cfg.POOLING_MODE == 'align': pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5)) elif cfg.POOLING_MODE == 'pool': pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5)) # feed pooled features to top model pooled_feat = self._head_to_tail(pooled_feat) head_reg_loss = torch.FloatTensor([0.]).cuda() if self.training and self.head_reg_weight != 0.: head_reg_loss = (pooled_feat**2).mean() * self.head_reg_weight # compute bbox offset bbox_pred = self.RCNN_bbox_pred(pooled_feat) # sample loc data normal_dist = torch.randn(bbox_pred.size(0), 4).float().cuda() log_sigma_2 = bbox_pred[:, :4] miu = bbox_pred[:, 4:] sigma = torch.exp(log_sigma_2 / 2.) sample_loc_data = normal_dist * sigma * self.sample_sigma + miu bbox_pred = sample_loc_data if self.training and not self.class_agnostic: # select the corresponding columns according to roi labels bbox_pred_view = bbox_pred.view(bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4) bbox_pred_select = torch.gather( bbox_pred_view, 1, rois_label.view(rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4)) bbox_pred = bbox_pred_select.squeeze(1) # compute object classification probability cls_score = self.RCNN_cls_score(pooled_feat) cls_prob = F.softmax(cls_score, 1) RCNN_loss_cls = torch.FloatTensor([0.]).cuda() RCNN_loss_bbox = torch.FloatTensor([0.]).cuda() if self.training: # classification loss RCNN_loss_cls = F.cross_entropy(cls_score, rois_label) # bounding box regression L1 loss RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws) cls_prob = cls_prob.view(batch_size, rois.size(1), -1) bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1) head_prior_loss = torch.FloatTensor([0.]).cuda() if self.training and self.head_prior_weight != 0.: scores = cls_prob.data # [batch, num_rois, classes] scores_gradient = cls_prob # [batch, num_rois, classes] boxes = rois.data[:, :, 1:5] # [batch, num_rois, 4] if cfg.TRAIN.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred.data # [batch, num_rois, 4] if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev if self.class_agnostic: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(batch_size, -1, 4) else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(batch_size, -1, 4 * len(self.classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas, batch_size) pred_boxes = clip_boxes(pred_boxes, im_info.data, batch_size) else: # Simply repeat the boxes, once for each class print("no use bbox head in IB") pred_boxes = np.tile(boxes, (1, scores.shape[1])) pred_boxes /= im_info[:, 2].data[:, None, None] # [batch, num_rois, 4] loss_count = 0. gt_classes = gt_boxes[:, :, -1].data # [batch, num(0 pad to 20)] for i in range(batch_size): for j in range(1, len(self.classes)): # skip background class if not (gt_classes[i] == j).any(): # no such class in gt continue # there are gt for this class inds = torch.nonzero( scores[i, :, j] > self.nms_threshold).view(-1) if inds.numel() == 0: continue cls_scores = scores[i, :, j][inds] # [num] cls_scores_gradient = scores_gradient[i, :, j][inds] _, order = torch.sort(cls_scores, 0, True) if self.class_agnostic: cls_boxes = pred_boxes[i, inds, :] # [num, 4] else: cls_boxes = pred_boxes[i, inds][:, j * 4:(j + 1) * 4] cls_scores_gradient = cls_scores_gradient[order] keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS) score = cls_scores_gradient[keep.view( -1).long()] # [num_keep] gt_num = (gt_classes[i] == j).sum().detach().cpu().item() if score.size(0) <= gt_num: continue score_sum = score.sum().detach().cpu().item() score = score / score_sum log_score = score * torch.log(score + 1e-6) head_prior_loss += (-1. * log_score.sum() / float(gt_num)) loss_count += 1. head_prior_loss /= loss_count head_prior_loss *= self.head_prior_weight return rois, cls_prob, bbox_pred, \ rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label, \ rpn_prior_loss, rpn_reg_loss, head_prior_loss, head_reg_loss