args.cfg_file = "cfgs/{}.yml".format(args.net) if args.cfg_file is not None: cfg_from_file(args.cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) print('Using config:') pprint.pprint(cfg) if torch.cuda.is_available() and not cfg.CUDA: print("Warning: You have a CUDA device, so you should run on it") imdbval_name = "coco_2014_minival" imdb, roidb = combined_roidb(imdbval_name, False) if args.net == 'res101': fasterRCNN = resnet(imdb.classes, 101, pretrained=True) fasterRCNN.create_architecture() checkpoint = torch.load(args.models) fasterRCNN.load_state_dict(checkpoint['model']) print("Load model from %s" % (args.models)) if args.gpu: fasterRCNN.cuda() fasterRCNN.eval() max_per_image = 100 thresh = 0.05 # vis = True imglist = os.listdir(args.image) num_images = len(imglist) print('Loaded Photo: {} images.'.format(num_images))
def inference(_test_img_path, _check_point, _score_threshold=0.3, class_agnostic=False): test_img_path = _test_img_path check_point = _check_point score_threshold = _score_threshold device = torch.device("cuda: 0" if torch.cuda.is_available() else "cpu") fasterRCNN = resnet(cfg.backbone, is_training=False, pretrained=False, class_agnostic=class_agnostic) fasterRCNN.create_architecture() print("load checkpoint %s" % (check_point)) checkpoint = torch.load(check_point) fasterRCNN.load_state_dict(checkpoint['model_state_dict']) print('load model successfully!') fasterRCNN.eval() fasterRCNN.to(device) im_data = torch.FloatTensor(1) im_info = torch.FloatTensor(1) im_data = im_data.cuda() im_info = im_data.cuda() start_time = time.time() test_img = cv2.imread(test_img_path) test_img_copy = copy.deepcopy(test_img) test_img_copy, scale = image_preprocess(test_img_copy) test_img_copy = torch.from_numpy(test_img_copy) im_info_tensor = torch.Tensor( [[[test_img_copy.size(2), test_img_copy.size(3)]]]) im_data.resize_(test_img_copy.shape).copy_(test_img_copy) im_info.resize_(im_info_tensor.shape).copy_(im_info_tensor) rois, cls_prob, bbox_pred, _, _, _, _, _ = fasterRCNN(im_data, None) #without gt scores = cls_prob.data boxes = rois.data[:, :, 1:5] box_deltas = bbox_pred.data if cfg.bbox_normalize_targets_precomputed: if class_agnostic: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.bbox_normalize_std).cuda() \ + torch.FloatTensor(cfg.bbox_normalize_means).cuda() box_deltas = box_deltas.view(1, -1, 4) else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.bbox_normalize_std).cuda() \ + torch.FloatTensor(cfg.bbox_normalize_means).cuda() print(box_deltas.size()) box_deltas = box_deltas.view(1, -1, 4 * len(cfg.class_to_ind)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, (im_data.size(2), im_data.size(3)), 1) pred_boxes = pred_boxes / scale scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() for j in range(1, len(cfg.class_to_ind)): inds = torch.nonzero(scores[:, j] > score_threshold).view(-1) if inds.numel() > 0: cls_scores = scores[:, j][inds] _, order = torch.sort(cls_scores, 0, True) if class_agnostic: cls_boxes = pred_boxes[inds, :] else: cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = cls_boxes[order] cls_scores = cls_scores[order] keep = nms(cls_dets, cls_scores, cfg.test_nms_threshold) cls_dets = cls_dets[keep.view(-1).long()] #当前类别保留下来的目标框 cls_scores = cls_scores[keep.view(-1).long()] test_img = draw_target(test_img, cls_dets, cls_scores, j) end_time = time.time() print('detect time:{}s'.format(end_time - start_time)) cv2.imshow('result', test_img) cv2.waitKey(0)
def train(): np.random.seed(cfg.rng_seed) if not os.path.exists(cfg.work_dir): os.makedirs(cfg.work_dir) train_set = PASCAL_VOC(cfg.trainset_root_path, 'trainval') dataloader = DataLoader(train_set, batch_size=cfg.batch_size, shuffle=True, num_workers=4) iters_per_epoch = len(train_set) // cfg.batch_size device = torch.device("cuda: 0" if torch.cuda.is_available() else "cpu") fasterRCNN = resnet(cfg.backbone, pretrained=True, class_agnostic=False) fasterRCNN.create_architecture() optimizer = torch.optim.SGD(fasterRCNN.parameters(), lr=cfg.learning_rate, momentum=0.9, weight_decay=5e-4) fasterRCNN.to(device) im_data = torch.FloatTensor(1) im_info = torch.FloatTensor(1) gt_boxes = torch.FloatTensor(1) im_data = im_data.cuda() im_info = im_info.cuda() gt_boxes = gt_boxes.cuda() # make variable im_data = Variable(im_data) im_info = Variable(im_info) gt_boxes = Variable(gt_boxes) start_epoch = 0 #load from if not cfg.load_from is None: checkpoint = torch.load(cfg.load_from) fasterRCNN.load_state_dict(checkpoint['model_state_dict']) optimizer.load_state_dict(checkpoint['optimizer_state_dict']) start_epoch = checkpoint['epoch'] for ep in range(start_epoch, cfg.epoch): fasterRCNN.train() if ep != 0 and (ep + 1) % cfg.lr_decay_step == 0: cur_lr = get_learing_rate(optimizer) adjust_learning_rate(optimizer, cur_lr * cfg.lr_decay) for step, batch_data in enumerate(dataloader): with torch.no_grad(): im_data.resize_(batch_data['image'].size()).copy_( batch_data['image']) gt_boxes.resize_(batch_data['gt_boxes'].size()).copy_( batch_data['gt_boxes']) im_info.resize_(batch_data['im_info'].size()).copy_( batch_data['im_info']) fasterRCNN.zero_grad() print('[epoch:{}/{}], [step {}/{}]'.format(ep + 1, cfg.epoch, step + 1, iters_per_epoch)) rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, \ RCNN_loss_cls, RCNN_loss_bbox, \ roi_labels = fasterRCNN(im_data, gt_boxes, im_info) loss = rpn_loss_cls.mean() + rpn_loss_bbox.mean( ) + RCNN_loss_cls.mean() + RCNN_loss_bbox.mean() optimizer.zero_grad() loss.backward() if cfg.clip_grad: clip_grad_norm(fasterRCNN.parameters(), 10) #限制每个梯度,防止梯度爆炸 optimizer.step() cur_lr = get_learing_rate(optimizer) print( 'loss:{:.5f}, lr:{}, rpn cls loss:{:.5f}, rpn bbox loss:{:.5f}, rcnn cls loss:{:.5f}, rcnn bbox loss:{:.5f}' .format(loss.item(), cur_lr, rpn_loss_cls.item(), rpn_loss_bbox.item(), RCNN_loss_cls.item(), RCNN_loss_bbox.item())) print('cls_prob:', cls_prob) #一个epoch结束后,则保存模型 if ep % (cfg.checkpoint_interval + 1) == 0: state = { 'epoch': ep, 'model_state_dict': fasterRCNN.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'loss': loss, } save_path = os.path.join( cfg.work_dir, cfg.checkpoint_name + '-' + str(ep + 1) + '.pth') torch.save(state, save_path)
def evalue(check_point, cache_path='./result.pkl', class_agnostic=False, ovthresh=0.5, use_07_metric=False): ind_class = {v: k for k, v in cfg.class_to_ind.items()} class_result_dic = {k: [] for k in cfg.class_to_ind.keys() } # store every class result imagenames = [] if not os.path.exists(cache_path): test_set = PASCAL_VOC(cfg.testset_root_path, 'test') dataloader = DataLoader(test_set, batch_size=cfg.batch_size, shuffle=True, num_workers=4) device = torch.device( "cuda: 0" if torch.cuda.is_available() else "cpu") fasterRCNN = resnet(cfg.backbone, is_training=False, pretrained=False, class_agnostic=class_agnostic) fasterRCNN.create_architecture() print("load checkpoint %s" % (check_point)) checkpoint = torch.load(check_point) fasterRCNN.load_state_dict(checkpoint['model_state_dict']) print('load model successfully!') fasterRCNN.eval() fasterRCNN.to(device) im_data = torch.FloatTensor(1) im_info = torch.FloatTensor(1) gt_boxes = torch.FloatTensor(1) im_data = im_data.cuda() im_info = im_info.cuda() gt_boxes = gt_boxes.cuda() #detect for result for batch_data in tqdm(dataloader): # batch_data = dataloader.next() with torch.no_grad(): im_data.resize_(batch_data['image'].size()).copy_( batch_data['image']) gt_boxes.resize_(batch_data['gt_boxes'].size()).copy_( batch_data['gt_boxes']) im_info.resize_(batch_data['im_info'].size()).copy_( batch_data['im_info']) image_name = os.path.basename( batch_data['imname'][0]).split('.')[0] imagenames.append(image_name) rois, cls_prob, bbox_pred, _, _, _, _, _ = fasterRCNN( im_data, gt_boxes) scores = cls_prob.data boxes = rois.data[:, :, 1:5] box_deltas = bbox_pred.data if cfg.bbox_normalize_targets_precomputed: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.bbox_normalize_std).cuda() \ + torch.FloatTensor(cfg.bbox_normalize_means).cuda() box_deltas = box_deltas.view(1, -1, 4) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info, 1) pred_boxes = pred_boxes / batch_data['im_info'][0, 2] scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() for j in range(1, len(cfg.class_to_ind)): inds = torch.nonzero(scores[:, j] > 0).view(-1) if inds.numel() > 0: cls_scores = scores[:, j][inds] _, order = torch.sort(cls_scores, 0, True) if class_agnostic: cls_boxes = pred_boxes[inds, :] else: cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = pred_boxes[order] cls_scores = cls_scores[order] keep = nms(cls_dets, cls_scores, cfg.test_nms_threshold) cls_dets = cls_dets[keep.view( -1).long()] # 当前类别保留下来的目标框 cls_scores = cls_scores[keep.view(-1).long()] for score, bbox in zip(cls_scores, cls_dets): class_result_dic[ind_class[j]].append({ 'image_name': image_name, 'score': score, 'bbox': [bbox[0], bbox[1], bbox[2], bbox[3]] }) print('writting result cache ......') with open(cache_path, 'wb') as fp: pickle.dump(class_result_dic, fp) else: with open( os.path.join(cfg.testset_root_path, 'ImageSets', 'Main', 'test.txt')) as fp: for line in fp: imagenames.append(line.strip()) with open(cache_path, 'rb') as fp: class_result_dic = pickle.load(fp) print('computer mAP... ') # computer map recs = {} for i, imagename in enumerate(imagenames): recs[imagename] = parse_rec( os.path.join(cfg.testset_root_path, 'Annotations', imagename + '.xml')) # extract gt objects for this class mAP = 0 for classname in cfg.class_to_ind.keys(): if classname == 'BG': continue print(classname, end=' ') class_recs = {} npos = 0 for imagename in imagenames: R = [obj for obj in recs[imagename] if obj['name'] == classname] bbox = np.array([x['bbox'] for x in R]) difficult = np.array([x['difficult'] for x in R]).astype(np.bool) det = [False] * len(R) npos = npos + sum(~difficult) class_recs[imagename] = { 'bbox': bbox, 'difficult': difficult, 'det': det } class_result = class_result_dic[classname] image_ids = [r['image_name'] for r in class_result] confidence = np.array([float(r['score']) for r in class_result]) BB = np.array([r['bbox'] for r in class_result]) # sort by confidence sorted_ind = np.argsort(-confidence) BB = BB[sorted_ind, :] image_ids = [image_ids[x] for x in sorted_ind] # go down dets and mark TPs and FPs nd = len(image_ids) tp = np.zeros(nd) fp = np.zeros(nd) for d in range(nd): R = class_recs[image_ids[d]] bb = BB[d, :].astype(float) ovmax = -np.inf BBGT = R['bbox'].astype(float) if BBGT.size > 0: # compute overlaps # intersection ixmin = np.maximum(BBGT[:, 0], bb[0]) iymin = np.maximum(BBGT[:, 1], bb[1]) ixmax = np.minimum(BBGT[:, 2], bb[2]) iymax = np.minimum(BBGT[:, 3], bb[3]) iw = np.maximum(ixmax - ixmin + 1., 0.) ih = np.maximum(iymax - iymin + 1., 0.) inters = iw * ih # union uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) + (BBGT[:, 2] - BBGT[:, 0] + 1.) * (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters) overlaps = inters / uni ovmax = np.max(overlaps) jmax = np.argmax(overlaps) if ovmax > ovthresh: if not R['difficult'][jmax]: if not R['det'][jmax]: tp[d] = 1. R['det'][jmax] = 1 else: fp[d] = 1. else: fp[d] = 1. # compute precision recall fp = np.cumsum(fp) tp = np.cumsum(tp) rec = tp / float(npos) # avoid divide by zero in case the first detection matches a difficult # ground truth prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps) ap = voc_ap(rec, prec, use_07_metric) print(ap) mAP += ap mAP = mAP / (len(cfg.class_to_ind) - 1) print('mAP:', mAP)