def _rois_bbox(self, rois, bbox, num_classes, im_info): rois = rois.repeat(1, num_classes) stds = torch.tensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() stds = stds.repeat(1, num_classes) means = torch.tensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() means = means.repeat(1, num_classes) bbox *= stds bbox += means bbox = bbox.view(-1, 4) rois = rois.view(-1, 5) rois_label, anchors = torch.split(rois, [1, 4], 1) anchors = bbox_transform_inv(anchors.unsqueeze(0), bbox.unsqueeze(0), 1) anchors = clip_boxes(anchors, im_info, 1) rois = torch.cat([rois_label.view(-1,1), anchors.squeeze(0)], 1).view(-1, 5 * num_classes) return rois
def loop(): args = parse_args() print('Called with args:') print(args) if torch.cuda.is_available() and not args.cuda: print("WARNING: You have a CUDA device, so you should probably run with --cuda") if args.dataset == "pascal_voc": args.imdb_name = "voc_2007_test" args.imdbval_name = "voc_2007_test" args.set_cfgs = ['ANCHOR_SCALES', '[8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]'] elif args.dataset == "pascal_voc_0712": args.imdb_name = "voc_2007_trainval+voc_2012_trainval" args.imdbval_name = "voc_2007_test" args.set_cfgs = ['ANCHOR_SCALES', '[8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]'] elif args.dataset == "coco": args.imdb_name = "coco_2014_train+coco_2014_valminusminival" args.imdbval_name = "coco_2014_minival" args.set_cfgs = ['ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]'] elif args.dataset == "imagenet": args.imdb_name = "imagenet_train" args.imdbval_name = "imagenet_val" args.set_cfgs = ['ANCHOR_SCALES', '[8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]'] elif args.dataset == "vg": args.imdb_name = "vg_150-50-50_minitrain" args.imdbval_name = "vg_150-50-50_minival" args.set_cfgs = ['ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]'] args.cfg_file = "cfgs/{}.yml".format(args.net) if args.cfg_file is not None: cfg_from_file(args.cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) print('Using config:') pprint.pprint(cfg) np.random.seed(cfg.RNG_SEED) cfg.TRAIN.USE_FLIPPED = False imdb, roidb, ratio_list, ratio_index = combined_roidb(args.imdbval_name, False) imdb.competition_mode(on=True) print('{:d} roidb entries'.format(len(roidb))) # initilize the network here. if args.net == 'vgg16': fpn = vgg16(imdb.classes, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res101': fpn = resnet(imdb.classes, 101, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res50': fpn = resnet(imdb.classes, 50, pretrained=True, class_agnostic=args.class_agnostic) elif args.net == 'res152': fpn = resnet(imdb.classes, 152, pretrained=True, class_agnostic=args.class_agnostic) else: print("network is not defined") pdb.set_trace() fpn.create_architecture() print('load model successfully!') im_data = torch.FloatTensor(1) im_info = torch.FloatTensor(1) num_boxes = torch.LongTensor(1) gt_boxes = torch.FloatTensor(1) # ship to cuda if args.cuda: im_data = im_data.cuda() im_info = im_info.cuda() num_boxes = num_boxes.cuda() gt_boxes = gt_boxes.cuda() # make variable im_data = Variable(im_data) im_info = Variable(im_info) num_boxes = Variable(num_boxes) gt_boxes = Variable(gt_boxes) if args.cuda: cfg.CUDA = True if args.cuda: fpn.cuda() start = time.time() max_per_image = 100 vis =True #args.vis if vis: thresh = 0.0 else: thresh = 0.0 save_name = 'faster_rcnn_10' num_images = len(imdb.image_index) all_boxes = [[[] for _ in range(num_images)] for _ in range(imdb.num_classes)] output_dir = get_output_dir(imdb, save_name) for h in range(200): dataset = roibatchLoader(roidb, ratio_list, ratio_index, args.batch_size, \ imdb.num_classes, training=False, normalize=False) dataloader = torch.utils.data.DataLoader(dataset, batch_size=args.batch_size, shuffle=False, num_workers=0, pin_memory=True) data_iter = iter(dataloader) _t = {'im_detect': time.time(), 'misc': time.time()} det_file = os.path.join(output_dir, 'detections.pkl') input_dir = args.load_dir + "/" + args.net + "/" + args.dataset if not os.path.exists(input_dir): raise Exception('There is no input directory for loading network from ' + input_dir) load_name = os.path.join(input_dir, 'fpn_{}_{}_{}.pth'.format(args.checksession, args.checkepoch, args.checkpoint)) print("load checkpoint %s" % (load_name)) checkpoint = torch.load(load_name) fpn.load_state_dict(checkpoint['model']) if 'pooling_mode' in checkpoint.keys(): cfg.POOLING_MODE = checkpoint['pooling_mode'] fpn.eval() empty_array = np.transpose(np.array([[], [], [], [], []]), (1, 0)) for i in range(num_images): data = data_iter.next() im_data.data.resize_(data[0].size()).copy_(data[0]) im_info.data.resize_(data[1].size()).copy_(data[1]) gt_boxes.data.resize_(data[2].size()).copy_(data[2]) num_boxes.data.resize_(data[3].size()).copy_(data[3]) det_tic = time.time() rois, cls_prob, bbox_pred, \ _, _, _, _, _ = fpn(im_data, im_info, gt_boxes, num_boxes) scores = cls_prob.data # 1*300*10 boxes = rois.data[:, :, 1:5] # 1*300*4 if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred.data # 1*300*40 if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev if args.class_agnostic: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(1, -1, 4) else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(1, -1, 4 * len(imdb.classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) else: # Simply repeat the boxes, once for each class pred_boxes = boxes pred_boxes /= data[1][0][2].cuda() scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() det_toc = time.time() detect_time = det_toc - det_tic misc_tic = time.time() if vis: im = cv2.imread(imdb.image_path_at(i)) im2show = np.copy(im) for j in range(1, imdb.num_classes): inds = torch.nonzero(scores[:, j] > thresh).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[:, j][inds] _, order = torch.sort(cls_scores, 0, True) if args.class_agnostic: cls_boxes = pred_boxes[inds, :] else: cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) # cls_dets = torch.cat((cls_boxes, cls_scores), 1) cls_dets = cls_dets[order] keep = nms(cls_dets, cfg.TEST.NMS,~args.cuda) cls_dets = cls_dets[keep.view(-1).long()] if vis: im2show = vis_detections(im2show, imdb.classes[j], cls_dets.cpu().numpy(), 0.3) all_boxes[j][i] = cls_dets.cpu().numpy() else: all_boxes[j][i] = empty_array # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack([all_boxes[j][i][:, -1] for j in range(1, imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in range(1, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] misc_toc = time.time() nms_time = misc_toc - misc_tic sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s \r' \ .format(i + 1, num_images, detect_time, nms_time)) sys.stdout.flush() if vis: cv2.imwrite('images/result%d_%d.png' %(args.checkepoch,i), im2show) #pdb.set_trace() # cv2.imshow('test', im2show) # cv2.waitKey(0) del data del pred_boxes del scores torch.cuda.empty_cache() with open(det_file, 'wb') as f: cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) print('Evaluating detections') aps, clss = imdb.evaluate_detections(all_boxes, output_dir) #print(aps) with open("result.txt", 'a+') as f: # print(args.checkepoch) lp="" cc=0 for b in clss: if cc!=len(clss)-1: lp=lp+"'"+str(b) + ":" + str(aps[cc])+"'," else: lp = lp + "'" + str(b) + ":" + str(aps[cc])+"'" cc=cc+1 sp = "["+lp+ "] ls:" + str(args.checksession) + "_" + str(args.checkepoch) # print(sp) f.write(sp + "\n") end = time.time() print("test time: %0.4fs" % (end - start)) args.checkepoch = args.checkepoch + 1 del data_iter del dataset del dataloader torch.cuda.empty_cache() #torch.empty_cache() gc.collect()
if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred.data if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev if args.class_agnostic: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(1, -1, 4) else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(1, -1, 4 * len(imdb.classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) pred_boxes /= data[1][0][2].item() scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() det_toc = time.time() detect_time = det_toc - det_tic misc_tic = time.time() for j in range(1, imdb.num_classes): inds = torch.nonzero(scores[:, j] > thresh).view(-1)
def forward(self, input): # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) # the first set of _num_anchors channels are bg probs # the second set are the fg probs scores = input[0][:, self._num_anchors:, :, :] bbox_deltas = input[1] im_info = input[2] cfg_key = input[3] pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH min_size = cfg[cfg_key].RPN_MIN_SIZE batch_size = bbox_deltas.size(0) feat_height, feat_width = scores.size(2), scores.size(3) shift_x = np.arange(0, feat_width) * self._feat_stride shift_y = np.arange(0, feat_height) * self._feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = torch.from_numpy( np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose()) shifts = shifts.contiguous().type_as(scores).float() A = self._num_anchors K = shifts.size(0) self._anchors = self._anchors.type_as(scores) # anchors = self._anchors.view(1, A, 4) + shifts.view(1, K, 4).permute(1, 0, 2).contiguous() anchors = self._anchors.view(1, A, 4) + shifts.view(K, 1, 4) anchors = anchors.view(1, K * A, 4).expand(batch_size, K * A, 4) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: bbox_deltas = bbox_deltas.permute(0, 2, 3, 1).contiguous() bbox_deltas = bbox_deltas.view(batch_size, -1, 4) # Same story for the scores: scores = scores.permute(0, 2, 3, 1).contiguous() scores = scores.view(batch_size, -1) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas, batch_size) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info, batch_size) # proposals = clip_boxes_batch(proposals, im_info, batch_size) # assign the score to 0 if it's non keep. # keep = self._filter_boxes(proposals, min_size * im_info[:, 2]) # trim keep index to make it euqal over batch # keep_idx = torch.cat(tuple(keep_idx), 0) # scores_keep = scores.view(-1)[keep_idx].view(batch_size, trim_size) # proposals_keep = proposals.view(-1, 4)[keep_idx, :].contiguous().view(batch_size, trim_size, 4) # _, order = torch.sort(scores_keep, 1, True) scores_keep = scores proposals_keep = proposals _, order = torch.sort(scores_keep, 1, True) output = scores.new(batch_size, post_nms_topN, 5).zero_() for i in range(batch_size): # # 3. remove predicted boxes with either height or width < threshold # # (NOTE: convert min_size to input image scale stored in im_info[2]) proposals_single = proposals_keep[i] scores_single = scores_keep[i] # # 4. sort all (proposal, score) pairs by score from highest to lowest # # 5. take top pre_nms_topN (e.g. 6000) order_single = order[i] if pre_nms_topN > 0 and pre_nms_topN < scores_keep.numel(): order_single = order_single[:pre_nms_topN] proposals_single = proposals_single[order_single, :] scores_single = scores_single[order_single].view(-1, 1) # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep_idx_i = nms(torch.cat((proposals_single, scores_single), 1), nms_thresh, force_cpu=not cfg.USE_GPU_NMS) keep_idx_i = keep_idx_i.long().view(-1) if post_nms_topN > 0: keep_idx_i = keep_idx_i[:post_nms_topN] proposals_single = proposals_single[keep_idx_i, :] scores_single = scores_single[keep_idx_i, :] # padding 0 at the end. num_proposal = proposals_single.size(0) output[i, :, 0] = i output[i, :num_proposal, 1:] = proposals_single return output
def interest(im2show, data, fpn, all_position, i, all_boxes, r_w, r_h, rat_w, rat_h): for key, value in all_position.items(): x = int(((value[2] - value[0]) / 2 + value[0]) * rat_w) y = int(((value[3] - value[1]) / 2 + value[1]) * rat_h) data_tem = data[0][:, :, y - int(HIGHT / 2):y + int(HIGHT / 2), x - int(WIDTH / 2):x + int(WIDTH / 2)] #print(data[0].shape()) w = len(data_tem[0][0][0]) h = len(data_tem[0][0]) print("INER", w, h) if w <= 0 or h <= 0: return None if args.cuda: data_tem1 = torch.from_numpy(np.array([[h, w, w / h]])).float().cuda() data_tem2 = torch.from_numpy(np.array([[1, 1, 1, 1, 1]])).float().cuda() data_tem3 = torch.from_numpy(np.array([1])).long().cuda() else: data_tem1 = torch.from_numpy(np.array([[h, w, w / h]])).float() data_tem2 = torch.from_numpy(np.array([[1, 1, 1, 1, 1]])).float() data_tem3 = torch.from_numpy(np.array([1])).long() im_data.data.resize_(data_tem.size()).copy_(data_tem) im_info.data.resize_(data_tem1.size()).copy_(data_tem1) gt_boxes.data.resize_(data_tem2.size()).copy_(data_tem2) num_boxes.data.resize_(data_tem3.size()).copy_(data_tem3) rois, cls_prob, bbox_pred, \ _, _, _, _, _ = fpn(im_data, im_info, gt_boxes, num_boxes) scores = cls_prob.data boxes = rois.data[:, :, 1:5] # 忽略掉前面一个数值,后面都是BOX if cfg.TEST.BBOX_REG: box_deltas = bbox_pred.data if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: if args.class_agnostic: if args.cuda: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS) box_deltas = box_deltas.view(1, -1, 4) else: if args.cuda: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS) box_deltas = box_deltas.view(1, -1, 4 * len(imdb.classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) else: pred_boxes = boxes pred_boxes /= data_tem1[0][2] scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() for j in range(1, imdb.num_classes): # 遍历每一类 inds = torch.nonzero(scores[:, j] > 0.6).view(-1) if inds.numel() > 0: cls_scores = scores[:, j][inds] _, order = torch.sort(cls_scores, 0, True) # 排序分数列表降低序 if args.class_agnostic: cls_boxes = pred_boxes[inds, :] else: cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] for c in range(len(cls_boxes)): # 调整,获取小图片在大图片里面的坐标 cls_boxes[c][0] = (cls_boxes[c][0] + x - int(WIDTH / 2)) / rat_w cls_boxes[c][1] = (cls_boxes[c][1] + y - int(HIGHT / 2)) / rat_h cls_boxes[c][2] = (cls_boxes[c][2] + x - int(WIDTH / 2)) / rat_w cls_boxes[c][3] = (cls_boxes[c][3] + y - int(HIGHT / 2)) / rat_h cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) # 追加 cls_dets = cls_dets[order] # 将torch.tensor 按给定的训练排序 keep = nms(cls_dets, cfg.TEST.NMS, args.cuda) # 非极大值抑制,获取要保留的 cls_dets = cls_dets[keep.view(-1).long()] # 从tensor里面拿出对应的数据结构 if all_boxes[j][i] == []: all_boxes[j][i] = cls_dets.cpu().numpy() else: all_boxes[j][i] = np.vstack( (all_boxes[j][i], cls_dets.cpu().numpy()))
def forward(self, input): # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) # the first set of _num_anchors channels are bg probs # the second set are the fg probs scores = input[0][:, :, 1] # batch_size x num_rois x 1 bbox_deltas = input[1] # batch_size x num_rois x 4 im_info = input[2] cfg_key = input[3] feat_shapes = input[4] pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH min_size = cfg[cfg_key].RPN_MIN_SIZE batch_size = bbox_deltas.size(0) anchors = torch.from_numpy( generate_anchors_all_pyramids( self._fpn_scales, self._anchor_ratios, feat_shapes, self._fpn_feature_strides, self._fpn_anchor_stride)).type_as(scores) num_anchors = anchors.size(0) anchors = anchors.view(1, num_anchors, 4).expand(batch_size, num_anchors, 4) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas, batch_size) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info, batch_size) # keep_idx = self._filter_boxes(proposals, min_size).squeeze().long().nonzero().squeeze() scores_keep = scores proposals_keep = proposals _, order = torch.sort(scores_keep, 1, True) output = scores.new(batch_size, post_nms_topN, 5).zero_() for i in range(batch_size): # # 3. remove predicted boxes with either height or width < threshold # # (NOTE: convert min_size to input image scale stored in im_info[2]) proposals_single = proposals_keep[i] scores_single = scores_keep[i] # # 4. sort all (proposal, score) pairs by score from highest to lowest # # 5. take top pre_nms_topN (e.g. 6000) order_single = order[i] if pre_nms_topN > 0 and pre_nms_topN < scores_keep.numel(): order_single = order_single[:pre_nms_topN] proposals_single = proposals_single[order_single, :] scores_single = scores_single[order_single].view(-1, 1) # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) #print(cfg.CUDA) keep_idx_i = nms(torch.cat((proposals_single, scores_single), 1), nms_thresh, cfg.CUDA) keep_idx_i = keep_idx_i.long().view(-1) if post_nms_topN > 0: keep_idx_i = keep_idx_i[:post_nms_topN] proposals_single = proposals_single[keep_idx_i, :] scores_single = scores_single[keep_idx_i, :] # padding 0 at the end. num_proposal = proposals_single.size(0) output[i, :, 0] = i output[i, :num_proposal, 1:] = proposals_single return output
def predict(cls, im_in): """For the input, do the predictions and return them. Args: im_in (a PIL image): The data on which to do the predictions.""" assert len(im_in.shape) == 3, "RGB images only" if cls.model is None: cls.model = cls.get_model() thresh = 0.05 with torch.no_grad(): blobs, im_scales = _get_image_blob(im_in) assert len(im_scales) == 1, "Only single-image batch implemented" im_blob = blobs im_data = Variable( torch.from_numpy(im_blob).permute(0, 3, 1, 2).cuda()) im_info_np = np.array( [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32) im_info = Variable(torch.from_numpy(im_info_np).cuda()) gt_boxes = Variable(torch.zeros(1, 1, 5).cuda()) num_boxes = Variable(torch.zeros(1).cuda()) rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_box, RCNN_loss_cls, RCNN_loss_bbox, rois_label = cls.model( im_data, im_info, gt_boxes, num_boxes) scores = cls_prob.data boxes = rois.data[:, :, 1:5] if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred.data if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(1, -1, 4 * len(cls.model.classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) pred_boxes /= im_scales[0] scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() result = dict() for j in range(1, len(cls.model.classes)): inds = torch.nonzero(scores[:, j] > thresh).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[:, j][inds] _, order = torch.sort(cls_scores, 0, True) cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) cls_dets = cls_dets[order] keep = nms(cls_dets, cfg.TEST.NMS, force_cpu=not cfg.USE_GPU_NMS) cls_dets = cls_dets[keep.view(-1).long()] result[cls.model.classes[j]] = cls_dets.cpu().numpy( ).tolist() return { 'pred': result, 'metrics': { 'rpn_loss_cls': rpn_loss_cls, 'rpn_loss_box': rpn_loss_box, 'RCNN_loss_cls': RCNN_loss_cls, 'RCNN_loss_bbox': RCNN_loss_bbox, 'rois_label': rois_label } }
def eval_result(args, logger, epoch, output_dir): if torch.cuda.is_available() and not args.cuda: print( "WARNING: You have a CUDA device, so you should probably run with --cuda" ) args.batch_size = 1 imdb, roidb, ratio_list, ratio_index = combined_roidb( args.imdbval_name, False, root_path=args.data_root) imdb.competition_mode(on=True) load_name = os.path.join(output_dir, 'thundernet_epoch_{}.pth'.format(epoch, )) layer = int(args.net.split("_")[1]) _RCNN = snet(imdb.classes, layer, pretrained_path=None, class_agnostic=args.class_agnostic) _RCNN.create_architecture() print("load checkpoint %s" % (load_name)) if args.cuda: checkpoint = torch.load(load_name) else: checkpoint = torch.load(load_name, map_location=lambda storage, loc: storage ) # Load all tensors onto the CPU _RCNN.load_state_dict(checkpoint['model']) im_data = torch.FloatTensor(1) im_info = torch.FloatTensor(1) num_boxes = torch.LongTensor(1) gt_boxes = torch.FloatTensor(1) # hm = torch.FloatTensor(1) # reg_mask = torch.LongTensor(1) # wh = torch.FloatTensor(1) # offset = torch.FloatTensor(1) # ind = torch.LongTensor(1) # ship to cuda if args.cuda: im_data = im_data.cuda() im_info = im_info.cuda() num_boxes = num_boxes.cuda() gt_boxes = gt_boxes.cuda() # hm = hm.cuda() # reg_mask = reg_mask.cuda() # wh = wh.cuda() # offset = offset.cuda() # ind = ind.cuda() # make variable with torch.no_grad(): im_data = Variable(im_data) im_info = Variable(im_info) num_boxes = Variable(num_boxes) gt_boxes = Variable(gt_boxes) # hm = Variable(hm) # reg_mask = Variable(reg_mask) # wh = Variable(wh) # offset = Variable(offset) # ind = Variable(ind) if args.cuda: cfg.CUDA = True if args.cuda: _RCNN.cuda() start = time.time() max_per_image = 100 vis = True if vis: thresh = 0.05 else: thresh = 0.0 save_name = 'thundernet' num_images = len(imdb.image_index) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] output_dir = get_output_dir(imdb, save_name) # dataset = roibatchLoader(roidb, ratio_list, ratio_index, args.batch_size, \ # imdb.num_classes, training=False, normalize=False) # dataset = roibatchLoader(roidb, imdb.num_classes, training=False) dataset = Detection(roidb, num_classes=imdb.num_classes, transform=BaseTransform(cfg.TEST.SIZE, cfg.PIXEL_MEANS), training=False) dataloader = torch.utils.data.DataLoader(dataset, batch_size=args.batch_size, shuffle=False, num_workers=0, pin_memory=True) data_iter = iter(dataloader) _t = {'im_detect': time.time(), 'misc': time.time()} det_file = os.path.join(output_dir, 'detections.pkl') _RCNN.eval() empty_array = np.transpose(np.array([[], [], [], [], []]), (1, 0)) for i in range(num_images): data = next(data_iter) with torch.no_grad(): im_data.resize_(data[0].size()).copy_(data[0]) im_info.resize_(data[1].size()).copy_(data[1]) gt_boxes.resize_(data[2].size()).copy_(data[2]) num_boxes.resize_(data[3].size()).copy_(data[3]) # hm.resize_(data[4].size()).copy_(data[4]) # reg_mask.resize_(data[5].size()).copy_(data[5]) # wh.resize_(data[6].size()).copy_(data[6]) # offset.resize_(data[7].size()).copy_(data[7]) # ind.resize_(data[8].size()).copy_(data[8]) det_tic = time.time() with torch.no_grad(): time_measure, \ rois, cls_prob, bbox_pred, \ rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, RCNN_loss_bbox, \ rois_label = _RCNN(im_data, im_info, gt_boxes, num_boxes, # hm,reg_mask,wh,offset,ind ) scores = cls_prob.data boxes = rois.data[:, :, 1:5] if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred.data if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev if args.class_agnostic: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(args.batch_size, -1, 4) else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(args.batch_size, -1, 4 * len(imdb.classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) # pred_boxes /= data[1][0][2].item() pred_boxes[:, :, 0::2] /= data[1][0][2].item() pred_boxes[:, :, 1::2] /= data[1][0][3].item() scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() det_toc = time.time() detect_time = det_toc - det_tic misc_tic = time.time() if vis: im = cv2.imread(imdb.image_path_at(i)) im2show = np.copy(im) for j in xrange(1, imdb.num_classes): inds = torch.nonzero(scores[:, j] > thresh).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[:, j][inds] _, order = torch.sort(cls_scores, 0, True) if args.class_agnostic: cls_boxes = pred_boxes[inds, :] else: cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) # cls_dets = torch.cat((cls_boxes, cls_scores), 1) cls_dets = cls_dets[order] keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS) # keep = soft_nms(cls_dets.cpu().numpy(), Nt=0.5, method=2) # keep = torch.as_tensor(keep, dtype=torch.long) cls_dets = cls_dets[keep.view(-1).long()] if vis: vis_detections(im2show, imdb.classes[j], color_list[j - 1].tolist(), cls_dets.cpu().numpy(), 0.6) all_boxes[j][i] = cls_dets.cpu().numpy() else: all_boxes[j][i] = empty_array # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack( [all_boxes[j][i][:, -1] for j in xrange(1, imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in xrange(1, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] misc_toc = time.time() nms_time = misc_toc - misc_tic sys.stdout.write( 'im_detect: {:d}/{:d}\tDetect: {:.3f}s (RPN: {:.3f}s, Pre-RoI: {:.3f}s, RoI: {:.3f}s, Subnet: {:.3f}s)\tNMS: {:.3f}s\r' \ .format(i + 1, num_images, detect_time, time_measure[0], time_measure[1], time_measure[2], time_measure[3], nms_time)) sys.stdout.flush() if vis and i % 200 == 0 and args.use_tfboard: im2show = im2show[:, :, ::-1] logger.add_image('pred_image_{}'.format(i), trans.ToTensor()(Image.fromarray( im2show.astype('uint8'))), global_step=i) # cv2.imwrite('result.png', im2show) # pdb.set_trace() # cv2.imshow('test', im2show) # cv2.waitKey(0) with open(det_file, 'wb') as f: pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL) print('Evaluating detections') ap_50 = imdb.evaluate_detections(all_boxes, output_dir) logger.add_scalar("map_50", ap_50, global_step=epoch) end = time.time() print("test time: %0.4fs" % (end - start))
def evaluation(name, net=None, vis=False, cuda=True, class_agnostic=False): cfg.TRAIN.USE_FLIPPED = False imdb, roidb, ratio_list, ratio_index = combined_roidb(name, False) imdb.competition_mode(on=True) print('{:d} roidb entries'.format(len(roidb))) if not net: input_dir = args.load_dir + "/" + args.net + "/" + args.dataset # input_dir = 'weight' if not os.path.exists(input_dir): raise Exception( 'There is no input directory for loading network from ' + input_dir) # load_name = os.path.join(input_dir, # 'faster_rcnn_{}_{}_{}.pth'.format(args.checksession, args.checkepoch, args.checkpoint)) load_name = os.path.join( input_dir, 'faster_rcnn_{}_best.pth'.format(cfg['POOLING_MODE'])) # initilize the network here. if args.net == 'vgg16': fasterRCNN = vgg16(imdb.classes, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res101': fasterRCNN = resnet(imdb.classes, 101, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res50': fasterRCNN = resnet(imdb.classes, 50, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res152': fasterRCNN = resnet(imdb.classes, 152, pretrained=False, class_agnostic=args.class_agnostic) else: print("network is not defined") pdb.set_trace() fasterRCNN.create_architecture() print("load checkpoint %s" % (load_name)) checkpoint = torch.load(load_name) fasterRCNN.load_state_dict(checkpoint['model']) if 'pooling_mode' in checkpoint.keys(): cfg.POOLING_MODE = checkpoint['pooling_mode'] print('load model successfully!') else: fasterRCNN = net # initilize the tensor holder here. im_data = torch.FloatTensor(1) im_info = torch.FloatTensor(1) num_boxes = torch.LongTensor(1) gt_boxes = torch.FloatTensor(1) # ship to cuda if cuda: im_data = im_data.cuda() im_info = im_info.cuda() num_boxes = num_boxes.cuda() gt_boxes = gt_boxes.cuda() # make variable im_data = Variable(im_data) im_info = Variable(im_info) num_boxes = Variable(num_boxes) gt_boxes = Variable(gt_boxes) if cuda: cfg.CUDA = True if cuda: fasterRCNN.cuda() start = time.time() max_per_image = 100 # vis = args.vis if vis: thresh = 0.05 else: thresh = 0.0 save_name = 'faster_rcnn_10' num_images = len(imdb.image_index) all_boxes = [[[] for _ in range(num_images)] for _ in range(imdb.num_classes)] output_dir = get_output_dir(imdb, save_name) dataset = roibatchLoader(roidb, ratio_list, ratio_index, 1, \ imdb.num_classes, training=False, normalize=False) dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False, num_workers=0, pin_memory=True) data_iter = iter(dataloader) _t = {'im_detect': time.time(), 'misc': time.time()} det_file = os.path.join(output_dir, 'detections.pkl') fasterRCNN.eval() empty_array = np.transpose(np.array([[], [], [], [], []]), (1, 0)) for i in range(num_images): data = next(data_iter) with torch.no_grad(): im_data.resize_(data[0].size()).copy_(data[0]) im_info.resize_(data[1].size()).copy_(data[1]) gt_boxes.resize_(data[2].size()).copy_(data[2]) num_boxes.resize_(data[3].size()).copy_(data[3]) det_tic = time.time() rois, cls_prob, bbox_pred, \ rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, RCNN_loss_bbox, \ rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes) scores = cls_prob.data boxes = rois.data[:, :, 1:5] if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred.data if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev if class_agnostic: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(1, -1, 4) else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(1, -1, 4 * len(imdb.classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) pred_boxes /= data[1][0][2].item() scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() det_toc = time.time() detect_time = det_toc - det_tic misc_tic = time.time() if vis: im = cv2.imread(imdb.image_path_at(i)) im2show = np.copy(im) for j in range(1, imdb.num_classes): inds = torch.nonzero(scores[:, j] > thresh).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[:, j][inds] _, order = torch.sort(cls_scores, 0, True) if class_agnostic: cls_boxes = pred_boxes[inds, :] else: cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) # cls_dets = torch.cat((cls_boxes, cls_scores), 1) cls_dets = cls_dets[order] keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS) cls_dets = cls_dets[keep.view(-1).long()] if vis: im2show = vis_detections(im2show, imdb.classes[j], cls_dets.cpu().numpy(), 0.3) all_boxes[j][i] = cls_dets.cpu().numpy() else: all_boxes[j][i] = empty_array # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack( [all_boxes[j][i][:, -1] for j in range(1, imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in range(1, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] misc_toc = time.time() nms_time = misc_toc - misc_tic sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s \r' \ .format(i + 1, num_images, detect_time, nms_time)) sys.stdout.flush() if vis: cv2.imwrite('result.png', im2show) pdb.set_trace() # cv2.imshow('test', im2show) # cv2.waitKey(0) with open(det_file, 'wb') as f: pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL) print('Evaluating detections') map = imdb.evaluate_detections(all_boxes, output_dir) # print(map) end = time.time() print("test time: %0.4fs" % (end - start)) return map
def forward(self, input): # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) # the first set of _num_anchors channels are bg probs # the second set are the fg probs # input输入形式为tuple = (rpn_cls_prob.data, rpn_bbox_pred.data, im_info, cfg_key) # 0-8为anchor的背景得分, 9-17为anchor的前景得分 scores = input[0][:, self._num_anchors:, :, :] # [1,9,53,37] bbox_deltas = input[1] # [1,36,53,37] im_info = input[2] cfg_key = input[3] pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N # nms之前保存的建议区域数量,检测阶段为6000 post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N # 通过nms后保存的建议区域数量,检测阶段为300 nms_thresh = cfg[cfg_key].RPN_NMS_THRESH # ms的阈值 检测阶段0.7 min_size = cfg[cfg_key].RPN_MIN_SIZE # 建议区域的最小宽度或高度,检测阶段为16 batch_size = bbox_deltas.size(0) # batch_size = 1 feat_height, feat_width = scores.size(2), scores.size(3) # 53,37 shift_x = np.arange(0, feat_width) * self._feat_stride shift_y = np.arange(0, feat_height) * self._feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) # 从坐标向量中返回坐标矩阵,元素交叉 # torch.from_numpy将np数据转化为tensor,将tensor转化为np:tensor.numpy() # ravel()函数与flatten()函数功能类似,将多维数组降一维,np.flatten返回拷贝,不会影响原始数据,np.ravel返回视图view # np.vstack按垂直方向(行顺序)堆叠数组构成一个新的数组 # shift_x,shift_y为[37,53]矩阵,展平后堆叠再转置,得到[1961,3]tensor shifts = torch.from_numpy(np.vstack((shift_x.ravel(),shift_y.ravel(),shift_x.ravel(),shift_y.ravel())).transpose()) shifts = shifts.contiguous().type_as(scores).float() #contiguous()把tensor变为连续分布形式 A = self._num_anchors K = shifts.size(0) # 9个anchor,每个包含四个坐标偏移值,宽高中心点坐标 self._anchors = self._anchors.type_as(scores) # [9,4] # anchors = self._anchors.view(1, A, 4) + shifts.view(1, K, 4).permute(1, 0, 2).contiguous anchors = self._anchors.view(1, A, 4) + shifts.view(K, 1, 4) # [1961, 9, 4] anchors = anchors.view(1, K * A, 4).expand(batch_size, K * A, 4) # [1, 17649, 4] # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchor bbox_deltas = bbox_deltas.permute(0,2,3,1).contiguous() # [1, 53, 37, 36] bbox_deltas = bbox_deltas.view(batch_size, -1, 4) # [1, 17649, 4] # Same story for the score scores = scores.permute(0,2,3,1).contiguous() # permute将维度换位 scores = scores.view(batch_size, -1) # [1, 17649] # Convert anchors into proposals via bbox transformations # 根据anchor和偏移量计算proposals,delta表示偏移量,返回左上和右下顶点的坐标(x1,y1,x2,y2) proposals = bbox_transform_inv(anchors, bbox_deltas, batch_size) # clip predicted boxes to image,将proposals限制在图片范围内,超出边界,则将边界赋值 proposals = clip_boxes(proposals, im_info, batch_size) # proposals = clip_boxes_batch(proposals, im_info, batch_size) # assign the score to 0 if it's non keep. # keep = self._filter_boxes(proposals, min_size * im_info[:, 2]) # trim keep index to make it euqal over batch # keep_idx = torch.cat(tuple(keep_idx), 0) # scores_keep = scores.view(-1)[keep_idx].view(batch_size, trim_size) # proposals_keep = proposals.view(-1, 4)[keep_idx, :].contiguous().view(batch_size, trim_size, 4) # _, order = torch.sort(scores_keep, 1, True) scores_keep = scores proposals_keep = proposals _, order = torch.sort(scores_keep, 1, True) # _ is scores after sort,order is index after scores output = scores.new(batch_size, post_nms_topN, 5).zero_() for i in range(batch_size): # # 3. remove predicted boxes with either height or width < threshold # # (NOTE: convert min_size to input image scale stored in im_info[2]) # 从[1,17949,4]转换到[17649,4],从[1, 17649]转换到[17649] proposals_single = proposals_keep[i] scores_single = scores_keep[i] # # 4. sort all (proposal, score) pairs by score from highest to lowest # # 5. take top pre_nms_topN (e.g. 6000) order_single = order[i] # numel函数返回元素个数 if pre_nms_topN > 0 and pre_nms_topN < scores_keep.numel(): order_single = order_single[:pre_nms_topN] # 测试阶段取前6000个得分的索引 # 取前6000的索引对应的区域和得分,[6000,4],[6000,1],这里会重新生成proposals_single的下标0:5999 proposals_single = proposals_single[order_single, :] scores_single = scores_single[order_single].view(-1,1) # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) # torch.cat 在第1维度拼接区域和得分矩阵,[6000,5] keep_idx_i = nms(torch.cat((proposals_single, scores_single), 1), nms_thresh, force_cpu=not cfg.USE_GPU_NMS) # keep_idx_i 返回通过nms阈值限制之后的索引,该索引基于6000的下标[102,1]或[561,1] keep_idx_i = keep_idx_i.long().view(-1) # 取该索引的前300个建议区域 if post_nms_topN > 0: keep_idx_i = keep_idx_i[:post_nms_topN] proposals_single = proposals_single[keep_idx_i, :] scores_single = scores_single[keep_idx_i, :] # padding 0 at the end.,将不足300的建议区域补0 num_proposal = proposals_single.size(0) output[i,:,0] = i output[i,:num_proposal,1:] = proposals_single return output
def val_batch(batch_num, batch): with torch.no_grad(): if using_spinn: spinn_res = spinn(batch.phrases) else: spinn_res = batch.phrases result = obj_detector(batch.imgs, batch.im_sizes, batch.gt_boxes, use_gt_boxes=True) rois, bbox_pred, cls_prob, rois_label, rpn_label, pooled_feat = result # scene detection part res_scene = scene_detector(rois.data, bbox_pred, batch.im_sizes, cls_prob, pooled_feat, spinn_res, rois_label, batch.gt_boxes, batch.gt_rels, use_gt_boxes=True) rois, roi_pair_proposals, obj_cls_prob, rel_cls_prob, roi_rel_pairs_score, _, _ = res_scene scores = cls_prob.data boxes = rois.data[:, :, 1:5] scale = batch.im_sizes[0][0][2] pred_boxes = boxes / scale pred_boxes = pred_boxes.squeeze() use_gt_boxes = True if not use_gt_boxes: # Apply bounding-box regression deltas box_deltas = bbox_pred.data if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor( cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() + torch.FloatTensor( cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(1, -1, 4) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, batch.im_sizes, 1) scores = scores.squeeze() theimg = cv2.imread(batch.im_fn[0]) # im = theimg.copy() # for n in range(len(batch.gt_classes[0])): # box = batch.gt_boxes[0, n, :] / batch.im_sizes[0][0][2] # batch.im_sizes[0][0][2] # box = box.cpu().numpy() # bbox = tuple(int(np.round(x)) for x in box[:4]) # cv2.rectangle(im, bbox[0:2], bbox[2:4], (0, 204, 0), 2) # class_name = val.ind_to_classes[batch.gt_classes[0][n]] # cv2.putText(im, '%s:' % (class_name), (bbox[0], bbox[1] + 15), cv2.FONT_HERSHEY_PLAIN, # 1.0, (0, 0, 255), thickness=1) # plt.figure(0) # plt.imshow(im) # plt.show() # plt.pause(0.1) # index_fg = (rpn_label == 1).nonzero().squeeze() # for n in range(len(index_fg)): # rois_boxes = boxes[0,index_fg[n],:4].cpu().numpy() # rois_boxes /= batch.im_sizes[0][0][2] # rois_boxes = tuple(int(np.round(x)) for x in rois_boxes) # if (rois_boxes[0] or rois_boxes[2]) < 0 or (rois_boxes[0] or rois_boxes[2]) > im.shape[1]\ # or (rois_boxes[1] or rois_boxes[3]) < 0 or (rois_boxes[1] or rois_boxes[3]) > im.shape[0]: # print('out of boundary') # cv2.rectangle(im, rois_boxes[0:2], rois_boxes[2:4], (204, 0, 0), 2) # # plt.imshow(im) # plt.show() # plt.pause(0.1) # theimg = cv2.resize(theimg,(int(im_scale * theimg.shape[0]), int(im_scale * theimg.shape[1]))) im2show = np.copy(theimg) # draw2 = ImageDraw.Draw(theimg2) ## ============================================================================================================================ # visualilze rois detection from Faster RCNN ## ============================================================================================================================ # v, class_index = torch.max(scores, 1) # for j in range(1, len(val.ind_to_classes)): # bboxs_index = (class_index == j).nonzero().squeeze() # if bboxs_index.numel() > 0: # cls_boxes = pred_boxes[bboxs_index] # cls_scores = v[bboxs_index] # _, order = torch.sort(cls_scores, 0, True) # # if (len(cls_scores.size()) == 0): # # print(cls_scores) # cls_scores = cls_scores.unsqueeze(0) # cls_boxes = cls_boxes.unsqueeze(0) # continue # # cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) # cls_dets = cls_dets[order] # # keep = nms(cls_dets, cfg.TEST.NMS, force_cpu=not cfg.USE_GPU_NMS) # keep = nms(cls_boxes[order, :], cls_scores[order], 0.5) # )cfg.TEST.NMS # cls_dets = cls_dets[keep.view(-1).long()] # # im2show_ = vis_detections(im2show, val.ind_to_classes[j], cls_dets.cpu().numpy(), 0.1) # plt.figure(3) # plt.clf() # plt.imshow(im2show_) # plt.title('vis_pre_detections') # plt.pause(0.1) # theimg2.show('obj_detection') im = theimg.copy() gt_boxes = batch.gt_boxes.data[0] rel_cnt, rel_corrent_cnt, gt_rel_rois, gt_rel_labels = \ eval_relations_recall(im, gt_boxes, scale, batch.gt_rels.data[0], pred_boxes.data, obj_cls_prob.data[0],roi_pair_proposals.view(-1, 2), rel_cls_prob.data[0], top_Ns, roi_rel_pairs_score, val,vis=False) return rel_cnt, rel_corrent_cnt
def forward(self, rois, bbox_pred, im_info, obj_cls_score, obj_cls_feat, spinn_res, rois_obj_label, gt_boxes, gt_relation, use_gt_boxes=False): batch_size = rois.size(0) num_boxes = min(gt_boxes.size(0), cfg.MAX_NUM_GT_BOXES) #batch normalization _obj_cls_feat = obj_cls_feat #self.bn_obj(obj_cls_feat) if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED and not use_gt_boxes: box_deltas = bbox_pred.data # conversly normalize targets by a precomputed mean and stdev this is done in RCNN_proposal_target box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(bbox_pred.size(0), -1, 4) boxes = rois.data[:, :, 1:5] pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info, 1) rois.data[:, :, 1:5] = pred_boxes if spinn_res is not None: #todo: when the spinn_res are all zero matrix encoder_res = self.encoder(spinn_res) self.res_spinn.append(encoder_res) else: encoder_res = None relpn_feats = _obj_cls_feat.view( rois.size(0), rois.size(1), _obj_cls_feat.size(1)) #todo: last size wether 1 or 2 roi_rel_pairs, roi_pair_proposals, roi_rel_pairs_score, relpn_loss_cls, relpn_eval= \ self.RELPN(rois.data, relpn_feats, encoder_res, im_info, gt_boxes.data, gt_relation.data, num_boxes, use_gt_boxes) if not self.training: if batch_size == 1: valid = roi_rel_pairs.sum(2).view(-1).nonzero().view(-1) roi_rel_pairs = roi_rel_pairs[:, valid, :] roi_pair_proposals = roi_pair_proposals[:, valid, :] roi_rel_pairs_score = roi_rel_pairs_score[:, valid, :] size_per_batch = _obj_cls_feat.size(0) / batch_size # xxx = torch.arange(0, batch_size).view(batch_size, 1, 1).type_as(roi_pair_proposals) * size_per_batch roi_pair_proposals = roi_pair_proposals + \ torch.arange(0, batch_size).view(batch_size, 1, 1).type_as(roi_pair_proposals) * size_per_batch roi_pair_proposals_v = roi_pair_proposals.view(-1, 2) ind_subject = roi_pair_proposals_v[:, 0] ind_object = roi_pair_proposals_v[:, 1] if self.training: roi_rel_pairs, rois_rel_label, roi_pair_keep = \ self.RELPN_proposal_target(roi_rel_pairs, gt_boxes.data, gt_relation.data, num_boxes) rois_rel_label = Variable(rois_rel_label.view(-1)) xxx = torch.arange(0, roi_pair_keep.size(0)).view( roi_pair_keep.size(0), 1).cuda() * roi_pair_proposals_v.size(0) x = xxx / batch_size # roi_pair_keep = roi_pair_keep + torch.arange(0, roi_pair_keep.size(0)).view(roi_pair_keep.size(0), 1).cuda() \ # * roi_pair_proposals_v.size(0) / batch_size roi_pair_keep = roi_pair_keep + x.float() roi_pair_keep = roi_pair_keep.view(-1).long() ind_subject = roi_pair_proposals_v[roi_pair_keep][:, 0] ind_object = roi_pair_proposals_v[roi_pair_keep][:, 1] _obj_cls_feat_sub = self.gcn_head_rel_sub(_obj_cls_feat) x_sobj = _obj_cls_feat_sub[ind_subject] #1500 x 4096 , 640 x_oobj = _obj_cls_feat_sub[ind_object] pred_feat = torch.cat((x_sobj, x_oobj), 1) # compute object classification probability #pred_feat = self.gcn_head_rel_fc(_pred_feat) # pred_feat = self.bn_rel(pred_feat) ## ============================================================================================================================ ## GCN ## ============================================================================================================================ if cfg.GCN_ON_FEATS and cfg.GCN_LAYERS > 0: # true x_obj_gcn, x_pred_gcn = self.GRCNN_gcn_feat( _obj_cls_feat, pred_feat, ind_subject, ind_object) # x_obj_gcn = self.fcsub(_obj_cls_feat) # #x_obj_gcn = self.bn_obj(x_obj_gcn) # # x_pred_gcn = self.fcrel(pred_feat) # x_pred_gcn = self.bn_rel(x_pred_gcn) ## ============================================================================================================================ ## LSTM endoder Layer ## ============================================================================================================================ # self.encoder.reset_state() # if cfg.GCN_HAS_ATTENTION: # true # # attend_score = self.GRCNN_gcn_att1(x_sobj, x_oobj, None) # N_rel x 1 # attend_score = attend_score.view(1, x_pred_relpn.size(0)) # # # # compute the intiial maps, including map_obj_att, map_obj_obj and map_obj_rel # # NOTE we have two ways to compute map among objects, one way is based on the overlaps among object rois. # # NOTE the intution behind this is that rois with overlaps should share some common features, we need to # # NOTE exclude one roi feature from another. # # NOTE another way is based on the classfication scores. The intuition is that, objects have some common # # cooccurence, such as bus are more frequently appear on the road. # # assert x_obj.size() == x_att.size(), "the numbers of object features and attribute features should be the same" # # size_per_batch = obj_cls_feat.size(0) / batch_size # # map_obj_obj = obj_cls_feat.data.new(obj_cls_feat.size(0), obj_cls_feat.size(0)).fill_(0.0) # eye_mat = torch.eye(int(size_per_batch)).type_as(obj_cls_feat.data) # for i in range(batch_size): # a = int(i * size_per_batch) # size_per_batch 128 # b = int((i + 1) * size_per_batch) # c = map_obj_obj[a:b, a:b] # c.fill_(1.0) # map_obj_obj[a:b, a:b].fill_(1.0) # map_obj_obj[a:b, a:b] = map_obj_obj[a:b, # a:b] - eye_mat # 256x256 block diagnal matrix, diagnal elements are 0 # #todo: change adjacent matrix # map_obj_obj = Variable(map_obj_obj) # # map_sobj_rel = Variable(obj_cls_feat.data.new(obj_cls_feat.size(0), x_pred_relpn.size(0)).zero_()) # map_sobj_rel.scatter_(0, Variable(ind_subject.contiguous().view(1, x_pred_relpn.size(0))), attend_score) # map_oobj_rel = Variable(obj_cls_feat.data.new(obj_cls_feat.size(0), x_pred_relpn.size(0)).zero_()) # map_oobj_rel.scatter_(0, Variable(ind_object.contiguous().view(1, x_pred_relpn.size(0))), attend_score) # map_obj_rel = torch.stack((map_sobj_rel, map_oobj_rel), 2) # # gcnstart = time.time() # # x_obj_gcn = obj_cls_feat # x_pred_gcn = x_pred_relpn # for i in range(cfg.GCN_LAYERS): # cfg.GCN_LAYERS # # pass graph representation to gcn # x_obj_gcn, x_pred_gcn = self.GRCNN_gcn_feat(x_obj_gcn, x_pred_gcn, map_obj_obj, map_obj_rel) # # x_sobj = x_obj_gcn[ind_subject] # x_oobj = x_obj_gcn[ind_object] # attend_score = self.GRCNN_gcn_att1(x_sobj, x_oobj, None) # N_rel x 1 # attend_score = attend_score.view(1, x_pred_gcn.size(0)) # # map_sobj_rel = Variable(obj_cls_feat.data.new(obj_cls_feat.size(0), x_pred_gcn.size(0)).zero_()) # map_sobj_rel.scatter_(0, Variable(ind_subject.contiguous().view(1, x_pred_gcn.size(0))), attend_score) # map_oobj_rel = Variable(obj_cls_feat.data.new(obj_cls_feat.size(0), x_pred_gcn.size(0)).zero_()) # map_oobj_rel.scatter_(0, Variable(ind_object.contiguous().view(1, x_pred_gcn.size(0))), attend_score) # map_obj_rel = torch.stack((map_sobj_rel, map_oobj_rel), 2) # 256x4096 # pdb.set_trace() # compute object classification loss gcn_obj_cls_score = self.GRCNN_obj_cls_score(x_obj_gcn) gcn_obj_cls_prob = F.softmax(gcn_obj_cls_score, 1) gcn_rel_cls_score = self.GRCNN_rel_cls_score(x_pred_gcn) gcn_rel_cls_prob = F.softmax(gcn_rel_cls_score, dim=1) ## ============================================================================================================================ ## LOSS function ## ============================================================================================================================ if self.training: if cfg.GCN_LAYERS > 0: # object classification los self.GRCNN_loss_obj_cls = F.cross_entropy( gcn_obj_cls_score, rois_obj_label.long()) # relation classification los self.rel_fg_cnt = torch.sum(rois_rel_label.data.ne(0)) self.rel_bg_cnt = rois_rel_label.data.numel( ) - self.rel_fg_cnt self.GRCNN_loss_rel_cls = F.cross_entropy( gcn_rel_cls_score, rois_rel_label.long()) grcnn_loss = self.GRCNN_loss_obj_cls + self.GRCNN_loss_rel_cls # used only for rpn relpn training relpn_loss = relpn_loss_cls gcn_obj_cls_prob = gcn_obj_cls_prob.view(batch_size, rois.size(1), -1) gcn_rel_cls_prob = gcn_rel_cls_prob.view( batch_size, int(gcn_rel_cls_prob.size(0) / batch_size), -1) ## ============================================================================================================================ ## Return Values ## ============================================================================================================================ if cfg.HAS_RELATIONS: if self.training: # true use this option return rois, gcn_obj_cls_prob, gcn_rel_cls_prob, self.GRCNN_loss_obj_cls, relpn_loss, grcnn_loss, relpn_eval # return rois, bbox_pred_frcnn, obj_cls_prob_frcnn, att_cls_prob, rel_cls_prob, rpn_loss, relpn_loss, grcnn_loss else: return rois, roi_pair_proposals, gcn_obj_cls_prob, gcn_rel_cls_prob, roi_rel_pairs_score, 0, 0 else: return relpn_eval, relpn_loss_cls
def validate_virat(val_loader, S_RAD, epoch, num_class, num_segments, vis, session, batch_size, input_data, cfg, log, dataset): val_iters_per_epoch = int(np.round(len(val_loader))) im_data, im_info, num_boxes, gt_boxes = input_data S_RAD.eval() all_boxes = [[[[] for _ in range(num_class)] for _ in range(batch_size * num_segments)] for _ in range(val_iters_per_epoch)] #limit the number of proposal per image across all the class max_per_image = cfg.MAX_DET_IMG #dict with matched detections and its score @class_idx eval_target = {one: 1 for one in activity2id_person} e = {one: {} for one in eval_target} # cat_id -> imgid -> {"dm","dscores"} #unique image id imgid = 0 num_gt = [0 for _ in range(num_class)] for step, data in enumerate(val_loader): im_data.resize_(data[0].size()).copy_(data[0]) gt_boxes.resize_(data[1].size()).copy_(data[1]) num_boxes.resize_(data[2].size()).copy_(data[2]) im_info.resize_(data[3].size()).copy_(data[3]) im_data = im_data.view(-1, im_data.size(2), im_data.size(3), im_data.size(4)) im_info = im_info.view(-1, 3) gt_boxes = gt_boxes.view(-1, cfg.MAX_NUM_GT_BOXES, num_class + 4) num_boxes = num_boxes.view(-1) #evaluate /inference cpde start = time.time() rois, cls_prob, bbox_pred = S_RAD(im_data, im_info, gt_boxes, num_boxes) torch.cuda.synchronize() end_time = time.time() - start scores = cls_prob.data boxes = rois.data[:, :, 1:5] #batch_size = rois.shape[0] box_deltas = bbox_pred.data box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(scores.shape[0], -1, 4 * num_class) #transforms the image to x1,y1,x2,y2, format and clips the coord to images pred_boxes = bbox_transform_inv(boxes, box_deltas, scores.shape[0]) pred_boxes = clip_boxes(pred_boxes, im_info.data, scores.shape[0]) #gt boxes gtbb = gt_boxes[:, :, 0:4] gtlabels = gt_boxes[:, :, 4:] #pred_boxes /= data[3][0][1][2].item() #gtbb /= data[3][0][1][2].item() #move the groudtruth to cpu gtbb = gtbb.cpu().numpy() gtlabels = gtlabels.cpu().numpy() #count = 0 for image in range(pred_boxes.shape[0]): box = [None for _ in range(num_class)] imgid += 1 for class_id in range(1, num_class): inds = torch.nonzero( scores[image, :, class_id] > cfg.VIRAT.SCORE_THRES).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[image, inds, class_id] #arranging in descending order _, order = torch.sort(cls_scores, 0, True) cls_boxes = pred_boxes[image, inds, class_id * 4:(class_id + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) cls_dets = cls_dets[order, :] keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS) cls_dets = cls_dets[keep.view(-1)] all_boxes[step][image][class_id] = cls_dets.cpu().numpy() #collect groud truth boxes for the image index = np.unique(np.nonzero(gtbb[image])[0]) gtbox = gtbb[image][index] label = gtlabels[image][index] #take groundtruth box only if the label =1 for that class box[class_id] = [ gtbox[i] for i in range(len(label)) if label[i, class_id] ] num_gt[class_id] += np.sum(len(box[class_id])) match_dt_gt(e, imgid, all_boxes[step][image], box, activity2id_person) if (step + 1) % 50 == 0: output = ('Test: [{0}/{1}]\t'.format(step, (val_iters_per_epoch))) print(output) aps = aggregate_eval(e, maxDet=max_per_image) mAP = (mean(aps[target] for target in aps.keys())) for k, v in aps.items(): output = ('class: [{0}] - {1}'.format(k, v)) log.write(output + '\n') print(output) mAPout = ('mAP at epoch {0}: {1}'.format(epoch, mAP)) print('mAP at epoch {0}: {1} \n'.format(epoch, mAP)) log.write(mAPout + '\n') log.flush()
def validate_voc(val_loader, S_RAD, epoch, num_class, num_segments, session, batch_size, cfg, log, dataset, pathway, eval_metrics): val_iters_per_epoch = int(np.round(len(val_loader))) S_RAD.eval() all_boxes = [[[[] for _ in range(num_class)] for _ in range(batch_size * num_segments)] for _ in range(val_iters_per_epoch)] bbox = [[[[] for _ in range(num_class)] for _ in range(batch_size * num_segments)] for _ in range(val_iters_per_epoch)] #limit the number of proposal per image across all the class max_per_image = cfg.MAX_DET_IMG #confusion matrix conf_mat = ConfusionMatrix(num_classes=num_class, CONF_THRESHOLD=0.8, IOU_THRESHOLD=0.2, dataset=dataset) num_gt = [0 for _ in range(num_class)] #data_iter = iter(val_loader) for step, data in enumerate(val_loader): #evaluate /inference code #start_time = time.time() rois, cls_prob, bbox_pred = S_RAD(data) #torch.cuda.synchronize() #end_time = time.time() - start_time if dataset == 'ucfsport': class_dict = act2id elif dataset == 'jhmdb': class_dict = jhmdbact2id elif dataset == 'ucf24': class_dict = ucf24act2id elif dataset == 'urfall': class_dict = fallactivity2id elif dataset == 'imfd': class_dict = imfallactivity2id scores = cls_prob.data boxes = rois.data[:, :, 1:5] box_deltas = bbox_pred.data box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(scores.shape[0], -1, 4 * num_class) #transforms the image to x1,y1,x2,y2, format and clips the coord to images pred_boxes = bbox_transform_inv(boxes, box_deltas, scores.shape[0]) if pathway == "two_pathway": im_info = data[0][3].view(-1, 3).to(device="cuda") gt_boxes = (data[0][1].view(-1, cfg.MAX_NUM_GT_BOXES, num_class + 4)).to(device="cuda") else: im_info = data[3].view(-1, 3).to(device="cuda") gt_boxes = (data[1].view(-1, cfg.MAX_NUM_GT_BOXES, num_class + 4)).to(device="cuda") pred_boxes = clip_boxes(pred_boxes, im_info.data, scores.shape[0]) #gt boxes gtbb = gt_boxes[:, :, 0:4] gtlabels = gt_boxes[:, :, 4:] #move the groudtruth to cpu gtbb = gtbb.cpu().numpy() gtlabels = gtlabels.cpu().numpy() #count = 0 for image in range(pred_boxes.shape[0]): for class_id in range(1, num_class): inds = torch.nonzero(scores[image, :, class_id] > 0).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[image, inds, class_id] #arranging in descending order _, order = torch.sort(cls_scores, 0, True) cls_boxes = pred_boxes[image, inds, class_id * 4:(class_id + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) cls_dets = cls_dets[order, :] keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS) cls_dets = cls_dets[keep.view(-1)] all_boxes[step][image][class_id] = cls_dets.cpu().numpy() #collect groud truth boxes for the image index = np.unique(np.nonzero(gtbb[image])[0]) gtbox = gtbb[image][index] label = gtlabels[image][index] #take groundtruth box only if the label =1 for that class bbox[step][image][class_id] = [ gtbox[i] for i in range(len(label)) if label[i, class_id] ] num_gt[class_id] += np.sum(len(bbox[step][image][class_id])) if eval_metrics: if len(bbox[step][image][class_id]) > 0 and len( all_boxes[step][image][class_id]) > 0: conf_mat.process_batch(all_boxes[step][image], bbox[step][image]) if eval_metrics: result = conf_mat.return_matrix() print(result) conf_mat.plot(result) ap = [None for _ in range(num_class)] #calculate fp anf tp for each detections for cls_id in range(1, num_class): tpfp = [] class_det = [] for video in range(len(all_boxes)): for batch in range(len(all_boxes[0])): tp_fp = (tpfp_default(all_boxes[video][batch][cls_id],\ bbox[video][batch][cls_id],iou_thr=0.5)) if (len(tp_fp) > 0 and len(all_boxes[video][batch][cls_id]) > 0): tpfp.append(tp_fp) class_det.append(all_boxes[video][batch][cls_id]) assert len(tpfp) == len(class_det) tp, fp = tuple(zip(*tpfp)) # sort all det bboxes by score, also sort tp and fp cls_det = np.vstack(class_det) num_dets = cls_det.shape[0] sort_inds = np.argsort(-cls_det[:, -1]) tp = np.hstack(tp)[:, sort_inds] fp = np.hstack(fp)[:, sort_inds] # calculate recall and precision with tp and fp tp = np.cumsum(tp, axis=1) fp = np.cumsum(fp, axis=1) eps = np.finfo(np.float32).eps recalls = tp / np.maximum(num_gt[cls_id], eps) precisions = tp / np.maximum((tp + fp), eps) #ROC curve visualisation if eval_metrics: import matplotlib.pyplot as plt colors = [ 'ac', 'navy', 'gold', 'turquoise', 'red', 'green', 'black', 'brown', 'darkorange', 'cornflowerblue', 'teal' ] plt.plot(recalls[0, :], precisions[0, :], color=colors[cls_id], lw=2, label='class {}'.format(cls_id)) ap[cls_id] = average_precision(recalls[0, :], precisions[0, :], mode='area') #Plot ROC Curve if eval_metrics: fig = plt.gcf() fig.subplots_adjust(bottom=0.25) plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('Recall') plt.ylabel('Precision') plt.title('Extension of Precision-Recall curve to multi-class') plt.legend(loc="best") plt.show() for k, v in class_dict.items(): #print("Average precision per class:") out = ("class [{0}]:{1} |gt:{2}".format(k, ap[v], num_gt[v])) print(out) log.write(out + '\n') mAP = ("mAP for epoch [{0}] is : {1}".format(epoch, mean(ap[1:]))) print(mAP) log.write(mAP + '\n') log.flush() print("----------------------------------------------")
def forward(self, im_data, im_info, gt_boxes, num_boxes): batch_size = im_data.size(0) im_info = im_info.data gt_boxes = gt_boxes.data num_boxes = num_boxes.data # feed image data to base model to obtain base feature map base_feat = self.RCNN_base(im_data) # feed base feature map tp RPN to obtain rois rois, rpn_loss_cls, rpn_loss_bbox, fg_scores, rpn_reg_loss = \ self.RCNN_rpn(base_feat, im_info, gt_boxes, num_boxes) rpn_prior_loss = torch.FloatTensor([0.]).cuda() # if it is training phrase, then use ground trubut bboxes for refining if self.training: roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes) rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data rois_label = Variable(rois_label.view(-1).long()) rois_target = Variable(rois_target.view(-1, rois_target.size(2))) rois_inside_ws = Variable( rois_inside_ws.view(-1, rois_inside_ws.size(2))) rois_outside_ws = Variable( rois_outside_ws.view(-1, rois_outside_ws.size(2))) if self.rpn_prior_weight != 0.: for i in range(batch_size): gt_num = num_boxes[i].detach().cpu().item() score = fg_scores[i] score_sum = score.sum().detach().cpu().item() score = score / score_sum log_score = score * torch.log(score + 1e-6) # p * log(p) rpn_prior_loss += (-1. * log_score.sum() / float(gt_num)) rpn_prior_loss /= batch_size rpn_prior_loss *= self.rpn_prior_weight else: rois_label = None rois_target = None rois_inside_ws = None rois_outside_ws = None rpn_loss_cls = torch.FloatTensor([0.]).cuda() rpn_loss_bbox = torch.FloatTensor([0.]).cuda() rois = Variable(rois) # do roi pooling based on predicted rois if cfg.POOLING_MODE == 'align': pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5)) elif cfg.POOLING_MODE == 'pool': pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5)) # feed pooled features to top model pooled_feat = self._head_to_tail(pooled_feat) head_reg_loss = torch.FloatTensor([0.]).cuda() if self.training and self.head_reg_weight != 0.: head_reg_loss = (pooled_feat**2).mean() * self.head_reg_weight # compute bbox offset bbox_pred = self.RCNN_bbox_pred(pooled_feat) # sample loc data normal_dist = torch.randn(bbox_pred.size(0), 4).float().cuda() log_sigma_2 = bbox_pred[:, :4] miu = bbox_pred[:, 4:] sigma = torch.exp(log_sigma_2 / 2.) sample_loc_data = normal_dist * sigma * self.sample_sigma + miu bbox_pred = sample_loc_data if self.training and not self.class_agnostic: # select the corresponding columns according to roi labels bbox_pred_view = bbox_pred.view(bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4) bbox_pred_select = torch.gather( bbox_pred_view, 1, rois_label.view(rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4)) bbox_pred = bbox_pred_select.squeeze(1) # compute object classification probability cls_score = self.RCNN_cls_score(pooled_feat) cls_prob = F.softmax(cls_score, 1) RCNN_loss_cls = torch.FloatTensor([0.]).cuda() RCNN_loss_bbox = torch.FloatTensor([0.]).cuda() if self.training: # classification loss RCNN_loss_cls = F.cross_entropy(cls_score, rois_label) # bounding box regression L1 loss RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws) cls_prob = cls_prob.view(batch_size, rois.size(1), -1) bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1) head_prior_loss = torch.FloatTensor([0.]).cuda() if self.training and self.head_prior_weight != 0.: scores = cls_prob.data # [batch, num_rois, classes] scores_gradient = cls_prob # [batch, num_rois, classes] boxes = rois.data[:, :, 1:5] # [batch, num_rois, 4] if cfg.TRAIN.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred.data # [batch, num_rois, 4] if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev if self.class_agnostic: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(batch_size, -1, 4) else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(batch_size, -1, 4 * len(self.classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas, batch_size) pred_boxes = clip_boxes(pred_boxes, im_info.data, batch_size) else: # Simply repeat the boxes, once for each class print("no use bbox head in IB") pred_boxes = np.tile(boxes, (1, scores.shape[1])) pred_boxes /= im_info[:, 2].data[:, None, None] # [batch, num_rois, 4] loss_count = 0. gt_classes = gt_boxes[:, :, -1].data # [batch, num(0 pad to 20)] for i in range(batch_size): for j in range(1, len(self.classes)): # skip background class if not (gt_classes[i] == j).any(): # no such class in gt continue # there are gt for this class inds = torch.nonzero( scores[i, :, j] > self.nms_threshold).view(-1) if inds.numel() == 0: continue cls_scores = scores[i, :, j][inds] # [num] cls_scores_gradient = scores_gradient[i, :, j][inds] _, order = torch.sort(cls_scores, 0, True) if self.class_agnostic: cls_boxes = pred_boxes[i, inds, :] # [num, 4] else: cls_boxes = pred_boxes[i, inds][:, j * 4:(j + 1) * 4] cls_scores_gradient = cls_scores_gradient[order] keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS) score = cls_scores_gradient[keep.view( -1).long()] # [num_keep] gt_num = (gt_classes[i] == j).sum().detach().cpu().item() if score.size(0) <= gt_num: continue score_sum = score.sum().detach().cpu().item() score = score / score_sum log_score = score * torch.log(score + 1e-6) head_prior_loss += (-1. * log_score.sum() / float(gt_num)) loss_count += 1. head_prior_loss /= loss_count head_prior_loss *= self.head_prior_weight return rois, cls_prob, bbox_pred, \ rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label, \ rpn_prior_loss, rpn_reg_loss, head_prior_loss, head_reg_loss