def test(): import os im_file = 'demo/004545.jpg' image = cv2.imread(im_file) detector = FasterRCNN() network.load_net('/media/longc/Data/models/VGGnet_fast_rcnn_iter_70000.h5', detector) detector.cuda() print('load model successfully!') # network.save_net(r'/media/longc/Data/models/VGGnet_fast_rcnn_iter_70000.h5', detector) # print('save model succ') t = Timer() t.tic() # image = np.zeros(shape=[600, 800, 3], dtype=np.uint8) + 255 dets, scores, classes = detector.detect(image, 0.3) runtime = t.toc() print('total spend: {}s'.format(runtime)) im2show = np.copy(image) for i, det in enumerate(dets): if scores[i] < 0.3: continue det = tuple(int(x) for x in det) cv2.rectangle(im2show, det[0:2], det[2:4], (255, 205, 51), 2) cv2.putText(im2show, '%s: %.3f' % (classes[i], scores[i]), (det[0], det[1] + 15), cv2.FONT_HERSHEY_PLAIN, 1.0, (0, 0, 255), thickness=1) cv2.imwrite(os.path.join('demo', 'out.jpg'), im2show)
def test(): import os im_file = 'demo/00001.jpg' # im_file = 'data/VOCdevkit2007/VOC2007/JPEGImages/009036.jpg' # im_file = '/media/longc/Data/data/2DMOT2015/test/ETH-Crossing/img1/000100.jpg' image = cv2.imread(im_file) model_file = './model/VGGnet_fast_rcnn_iter_70000.h5' # model_file = '/media/longc/Data/models/faster_rcnn_pytorch3/faster_rcnn_100000.h5' # model_file = '/media/longc/Data/models/faster_rcnn_pytorch2/faster_rcnn_2000.h5' detector = FasterRCNN() network.load_net(model_file, detector) detector.cuda() detector.eval() print('load model successfully!') # network.save_net(r'/media/longc/Data/models/VGGnet_fast_rcnn_iter_70000.h5', detector) # print('save model succ') t = Timer() t.tic() # image = np.zeros(shape=[600, 800, 3], dtype=np.uint8) + 255 dets, scores, classes = detector.detect(image, 0.7) runtime = t.toc() print('total spend: {}s'.format(runtime)) im2show = np.copy(image) for i, det in enumerate(dets): det = tuple(int(x) for x in det) cv2.rectangle(im2show, det[0:2], det[2:4], (255, 205, 51), 2) cv2.putText(im2show, '%s: %.3f' % (classes[i], scores[i]), (det[0], det[1] + 15), cv2.FONT_HERSHEY_PLAIN, 1.0, (0, 0, 255), thickness=1) cv2.imwrite(os.path.join('demo', 'out.jpg'), im2show)
def load_model(model_file_path): detector = FasterRCNN() network.load_net(model_file_path, detector) detector.cuda() detector.eval() print('load model successfully!') return detector
def build_extractor(model_file, classes=None): if classes is None: extractor = FasterRCNN() else: extractor = FasterRCNN(classes) extractor.cuda() extractor.eval() network.load_net(model_file, extractor) print('load model successfully!') return extractor
def test(): import os im_file = 'demo/004545.jpg' # im_file = 'data/VOCdevkit2007/VOC2007/JPEGImages/009036.jpg' # im_file = '/media/longc/Data/data/2DMOT2015/test/ETH-Crossing/img1/000100.jpg' image = cv2.imread(im_file) # model_file = './VGGnet_fast_rcnn_iter_70000.h5' # model_file = '/media/longc/Data/models/faster_rcnn_pytorch3/faster_rcnn_100000.h5' # model_file = '/media/longc/Data/models/faster_rcnn_pytorch2/faster_rcnn_2000.h5' model_file = './models/saved_model_max/faster_rcnn_100000.h5' detector = FasterRCNN() network.load_net(model_file, detector) detector.cuda() detector.eval() print('load model successfully!') # network.save_net(r'/media/longc/Data/models/VGGnet_fast_rcnn_iter_70000.h5', detector) # print('save model succ') t = Timer() t.tic() # image = np.zeros(shape=[600, 800, 3], dtype=np.uint8) + 255 dets, scores, classes = detector.detect(image, 0.7) runtime = t.toc() print('total spend: {}s'.format(runtime)) im2show = np.copy(image) img = mpimg.imread(im_file) # Create figure and axes fig, ax = plt.subplots(1) # Display the image ax.imshow(img) # Create a Rectangle patch for i, det in enumerate(dets): w = det[2] - det[0] h = det[3] - det[1] rect = patches.Rectangle(det[0:2], w, h, linewidth=1, edgecolor='r', facecolor='none') # text plt.text(det[0], det[1], '%s: %.3f' % (classes[i], scores[i])) # Add the patch to the Axes ax.add_patch(rect) plt.show() print('aa')
def test(visualize=False): import os im_file = 'data/cervix/train/Type_2/1381.jpg' im_name = im_file.split('/')[-1] image = cv2.imread(im_file) # model_file = 'models/VGGnet_fast_rcnn_iter_70000.h5' model_file = 'models/saved_model3/faster_rcnn_100000.h5' expm = model_file.split('/')[-1].split('.')[0] expm_dir = os.path.join('demo', expm) if not os.path.exists(expm_dir): os.makedirs(expm_dir) detector = FasterRCNN() network.load_net(model_file, detector) detector.cuda() detector.eval( ) # set model in evaluation mode, has effect on Dropout and Batchnorm. Use train() to set train mode. print('load model successfully!') # network.save_net(r'/media/longc/Data/models/VGGnet_fast_rcnn_iter_70000.h5', detector) # print('save model succ') t = Timer() t.tic() # image = np.zeros(shape=[600, 800, 3], dtype=np.uint8) + 255 dets, scores, classes = detector.detect(image, 0.7) runtime = t.toc() print('total spend: {}s'.format(runtime)) im2show = np.copy(image) for i, det in enumerate(dets): det = tuple(int(x) for x in det) cv2.rectangle(im2show, det[0:2], det[2:4], (255, 205, 51), 4) cv2.putText(im2show, '%s: %.3f' % (classes[i], scores[i]), (det[0], det[1] + 15), cv2.FONT_HERSHEY_PLAIN, 1.0, (0, 0, 255), thickness=1) cv2.imwrite(os.path.join('demo', expm, im_name), im2show) if visualize: im2show = cv2.resize(im2show, None, None, fx=0.15, fy=0.15, interpolation=cv2.INTER_LINEAR) cv2.imshow('demo', im2show) cv2.waitKey(0)
def analysis_video(self, result_dir): self.statusbar_stringvar.set('Analysis..Please wait..') model_file = 'model.h5' detector = FasterRCNN() network.load_net(model_file, detector) detector.cuda() detector.eval() print('load model successfully!') info_dict = {} info_dict['pictures'] = [] for index in range(len(self.image_list)): accuracy = 0. pic_info = {} pic_info['objects'] = [] dets, scores, classes = detector.detect(self.image_list[index], 0.8) im2show = np.copy(self.image_list[index]) for i, det in enumerate(dets): object_info = {} det = tuple(int(x) for x in det) cv2.rectangle(im2show, det[0:2], det[2:4], (255, 205, 51), 2) cv2.putText(im2show, '%s: %.3f' % (classes[i], scores[i]), (det[0], det[1] + 15), cv2.FONT_HERSHEY_PLAIN, 1.0, (0, 0, 255), thickness=1) accuracy += scores[i] #object info initial object_info['name'] = classes[i] object_info['accuracy'] = scores[i] object_info['bbox'] = det pic_info['objects'].append(object_info) # pic_info initial pic_info['filename'] = os.path.basename(self.video_path).split('.')[0] + '_' + str(index + 1) + '.jpg' pic_info['size'] = im2show.shape info_dict['pictures'].append(pic_info) cv2.imwrite(os.path.join(result_dir, pic_info['filename']), im2show) self.view_table.update(index + 1, **{ 'name': pic_info['filename'], 'accuracy': accuracy / len(classes), 'state': 'yes' }) self.statusbar_stringvar.set('Analysis done!') return info_dict
def main(): global args print "Loading training set and testing set..." # train_set = visual_genome(args.dataset_option, 'train') test_set = visual_genome('small', 'test') object_classes = test_set.object_classes print "Done." # train_loader = torch.utils.data.DataLoader(train_set, batch_size=1, shuffle=True, num_workers=8, pin_memory=True) test_loader = torch.utils.data.DataLoader(test_set, batch_size=1, shuffle=False, num_workers=8, pin_memory=True) net = FasterRCNN(use_kmeans_anchors=args.use_kmeans_anchors, n_classes=len(object_classes), model=args.base_model) network.load_net('./output/detection/Faster_RCNN_small_vgg_12epoch_epoch_11.h5', net) # network.load_net('./output/detection/RPN_object1_best.h5', net) # network.set_trainable(net.features, requires_grad=False) net.cuda() # Testing recall = test(test_loader, net) print('Recall: ' 'object: {recall: .3f}%'.format(recall=recall*100))
def train(): args = parse_args() args.decay_lrs = cfg.TRAIN.DECAY_LRS cfg.USE_GPU_NMS = True if args.use_cuda else False assert args.batch_size == 1, 'Only support single batch' lr = cfg.TRAIN.LEARNING_RATE momentum = cfg.TRAIN.MOMENTUM weight_decay = cfg.TRAIN.WEIGHT_DECAY gamma = cfg.TRAIN.GAMMA # initial tensorboardX writer if args.use_tfboard: if args.exp_name == 'default': writer = SummaryWriter() else: writer = SummaryWriter('runs/' + args.exp_name) if args.dataset == 'voc07trainval': args.imdb_name = 'voc_2007_trainval' args.imdbval_name = 'voc_2007_test' elif args.dataset == 'voc0712trainval': args.imdb_name = 'voc_2007_trainval+voc_2012_trainval' args.imdbval_name = 'voc_2007_test' else: raise NotImplementedError if args.net == 'res50': fname = 'resnet50-caffe.pth' elif args.net == 'res101': fname = 'resnet101-caffe.pth' else: raise NotImplementedError args.pretrained_model = os.path.join('data', 'pretrained', fname) output_dir = args.output_dir if not os.path.exists(output_dir): os.makedirs(output_dir) # dataset_cachefile = os.path.join(output_dir, 'dataset.pickle') # if not os.path.exists(dataset_cachefile): # imdb, roidb = combined_roidb(args.imdb_name) # cache = [imdb, roidb] # with open(dataset_cachefile, 'wb') as f: # pickle.dump(cache, f) # print('save dataset cache') # else: # with open(dataset_cachefile, 'rb') as f: # cache = pickle.load(f) # imdb, roidb = cache[0], cache[1] # print('loaded dataset from cache') imdb, roidb = combined_roidb(args.imdb_name) train_dataset = RoiDataset(roidb) train_dataloader = DataLoader(train_dataset, args.batch_size, shuffle=True) model = FasterRCNN(backbone=args.net, pretrained=args.pretrained_model) print('model loaded') # if cfg.PRETRAINED_RPN: # rpn_model_path = 'output/rpn.pth' # model.load_state_dict(torch.load(rpn_model_path)['model']) # print('loaded rpn!') # optimizer params = [] for key, value in dict(model.named_parameters()).items(): if value.requires_grad: if 'bias' in key: params += [{'params': [value], 'lr': lr * (cfg.TRAIN.DOUBLE_BIAS + 1), \ 'weight_decay': cfg.TRAIN.BIAS_DECAY and weight_decay or 0}] else: params += [{ 'params': [value], 'lr': lr, 'weight_decay': weight_decay }] optimizer = SGD(params, momentum=momentum) if args.use_cuda: model = model.cuda() model.train() iters_per_epoch = int(len(train_dataset) / args.batch_size) # start training for epoch in range(args.start_epoch, args.max_epochs + 1): loss_temp = 0 rpn_tp, rpn_tn, rpn_fg, rpn_bg = 0, 0, 0, 0 rcnn_tp, rcnn_tn, rcnn_fg, rcnn_bg = 0, 0, 0, 0 tic = time.time() train_data_iter = iter(train_dataloader) if epoch in args.decay_lrs: lr = lr * gamma adjust_learning_rate(optimizer, lr) print('adjust learning rate to {}'.format(lr)) for step in range(iters_per_epoch): im_data, gt_boxes, im_info = next(train_data_iter) if args.use_cuda: im_data = im_data.cuda() gt_boxes = gt_boxes.cuda() im_info = im_info.cuda() im_data_variable = Variable(im_data) output = model(im_data_variable, gt_boxes, im_info) rois, _, _, \ rcnn_cls_loss, rcnn_box_loss, \ rpn_cls_loss, rpn_box_loss, _train_info = output loss = rcnn_cls_loss.mean() + rcnn_box_loss.mean() +\ rpn_cls_loss.mean() + rpn_box_loss.mean() optimizer.zero_grad() loss.backward() optimizer.step() loss_temp += loss.item() if cfg.VERBOSE: rpn_tp += _train_info['rpn_tp'] rpn_tn += _train_info['rpn_tn'] rpn_fg += _train_info['rpn_num_fg'] rpn_bg += _train_info['rpn_num_bg'] rcnn_tp += _train_info['rcnn_tp'] rcnn_tn += _train_info['rcnn_tn'] rcnn_fg += _train_info['rcnn_num_fg'] rcnn_bg += _train_info['rcnn_num_bg'] if (step + 1) % args.display_interval == 0: toc = time.time() loss_temp /= args.display_interval rpn_cls_loss_v = rpn_cls_loss.mean().item() rpn_box_loss_v = rpn_box_loss.mean().item() rcnn_cls_loss_v = rcnn_cls_loss.mean().item() rcnn_box_loss_v = rcnn_box_loss.mean().item() print("[epoch %2d][step %4d/%4d] loss: %.4f, lr: %.2e, time cost %.1fs" \ % (epoch, step+1, iters_per_epoch, loss_temp, lr, toc - tic)) print("\t\t\t rpn_cls_loss_v: %.4f, rpn_box_loss_v: %.4f\n\t\t\t " "rcnn_cls_loss_v: %.4f, rcnn_box_loss_v: %.4f" \ % (rpn_cls_loss_v, rpn_box_loss_v, rcnn_cls_loss_v, rcnn_box_loss_v)) if cfg.VERBOSE: print('\t\t\t RPN : [FG/BG] [%d/%d], FG: %.4f, BG: %.4f' % (rpn_fg, rpn_bg, float(rpn_tp) / rpn_fg, float(rpn_tn) / rpn_bg)) print('\t\t\t RCNN: [FG/BG] [%d/%d], FG: %.4f, BG: %.4f' % (rcnn_fg, rcnn_bg, float(rcnn_tp) / rcnn_fg, float(rcnn_tn) / rcnn_bg)) if args.use_tfboard: n_iter = (epoch - 1) * iters_per_epoch + step + 1 writer.add_scalar('losses/loss', loss_temp, n_iter) writer.add_scalar('losses/rpn_cls_loss_v', rpn_cls_loss_v, n_iter) writer.add_scalar('losses/rpn_box_loss_v', rpn_box_loss_v, n_iter) writer.add_scalar('losses/rcnn_cls_loss_v', rcnn_cls_loss_v, n_iter) writer.add_scalar('losses/rcnn_box_loss_v', rcnn_box_loss_v, n_iter) if cfg.VERBOSE: writer.add_scalar('rpn/fg_acc', float(rpn_tp) / rpn_fg, n_iter) writer.add_scalar('rpn/bg_acc', float(rpn_tn) / rpn_bg, n_iter) writer.add_scalar('rcnn/fg_acc', float(rcnn_tp) / rcnn_fg, n_iter) writer.add_scalar('rcnn/bg_acc', float(rcnn_tn) / rcnn_bg, n_iter) loss_temp = 0 rpn_tp, rpn_tn, rpn_fg, rpn_bg = 0, 0, 0, 0 rcnn_tp, rcnn_tn, rcnn_fg, rcnn_bg = 0, 0, 0, 0 tic = time.time() if epoch % args.save_interval == 0: save_name = os.path.join( output_dir, 'faster_{}_epoch_{}.pth'.format(args.net, epoch)) torch.save({ 'model': model.state_dict(), 'epoch': epoch, 'lr': lr }, save_name)
def train(): args = parse_args() lr = args.lr decay_lrs = args.decay_lrs momentum = args.momentum weight_decay = args.weight_decay bais_decay = args.bais_decay gamma = args.gamma cfg.USE_GPU_NMS = True if args.use_gpu else False if args.use_tfboard: writer = SummaryWriter() # load data print('load data') if args.dataset == 'voc07trainval': dataset_name = 'voc_2007_trainval' elif args.dataset == 'voc12trainval': dataset_name = 'voc_2012_trainval' elif args.dataset == 'voc0712trainval': dataset_name = 'voc_2007_trainval+voc_2012_trainval' else: raise NotImplementedError imdb, roidb = combined_roidb(dataset_name) train_dataset = RoiDataset(roidb) train_dataloader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True) iter_per_epoch = int(len(train_dataset) / args.batch_size) # prepare model print('load model') model = FasterRCNN(backbone=args.backbone) params = [] for key, value in dict(model.named_parameters()).items(): if value.requires_grad: if 'bias' in key: params += [{ 'params': [value], 'lr': lr * (cfg.TRAIN.DOUBLE_BIAS + 1), 'weight_decay': bais_decay and weight_decay or 0 }] else: params += [{ 'params': [value], 'lr': lr, 'weight_decay': weight_decay }] if args.use_gpu: model = model.cuda() model.train() # define optimizer optimizer = SGD(params, momentum=momentum) # training print('start training...') for epoch in range(args.epochs): start_time = time.time() train_data_iter = iter(train_dataloader) temp_loss = 0 rpn_tp, rpn_tn, rpn_fg, rpn_bg = 0, 0, 0, 0 faster_rcnn_tp, faster_rcnn_tn, faster_rcnn_fg, faster_rcnn_bg = 0, 0, 0, 0 if epoch in decay_lrs: lr = lr * gamma adjust_lr(optimizer, lr) print('adjusting learning rate to {}'.format(lr)) for step in range(iter_per_epoch): im_data, gt_boxes, im_info = next(train_data_iter) if args.use_gpu: im_data = im_data.cuda() gt_boxes = gt_boxes.cuda() im_info = im_info.cuda() im_data_variable = Variable(im_data) outputs = model(im_data_variable, gt_boxes, im_info) rois, _, _, faster_rcnn_cls_loss, faster_rcnn_reg_loss, \ rpn_cls_loss, rpn_reg_loss, _train_info = outputs loss = faster_rcnn_cls_loss.mean() + faster_rcnn_reg_loss.mean() + \ rpn_cls_loss.mean() + rpn_reg_loss.mean() optimizer.zero_grad() loss.backward() optimizer.step() temp_loss += loss.item() if cfg.VERBOSE: rpn_tp += _train_info['rpn_tp'] rpn_tn += _train_info['rpn_tn'] rpn_fg += _train_info['rpn_num_fg'] rpn_bg += _train_info['rpn_num_bg'] faster_rcnn_tp += _train_info['faster_rcnn_tp'] faster_rcnn_tn += _train_info['faster_rcnn_tn'] faster_rcnn_fg += _train_info['faster_rcnn_num_fg'] faster_rcnn_bg += _train_info['faster_rcnn_num_bg'] if (step + 1) % args.display_interval == 0: end_time = time.time() temp_loss /= args.display_interval rpn_cls_loss_m = rpn_cls_loss.mean().item() rpn_reg_loss_m = rpn_reg_loss.mean().item() faster_rcnn_cls_loss_m = faster_rcnn_cls_loss.mean().item() faster_rcnn_reg_loss_m = faster_rcnn_reg_loss.mean().item() print('[epoch %2d][step %4d/%4d] loss: %.4f, time_cost: %.1f' % (epoch, step + 1, iter_per_epoch, temp_loss, end_time - start_time)) print( 'loss: rpn_cls_loss_m: %.4f, rpn_reg_loss_m: %.4f, faster_rcnn_cls_loss_m: %.4f, faster_rcnn_reg_loss_m: %.4f' % (rpn_cls_loss_m, rpn_reg_loss_m, faster_rcnn_cls_loss_m, faster_rcnn_reg_loss_m)) if args.use_tfboard: n_iter = epoch * iter_per_epoch + step + 1 writer.add_scalar('losses/loss', temp_loss, n_iter) writer.add_scalar('losses/rpn_cls_loss_m', rpn_cls_loss_m, n_iter) writer.add_scalar('losses/rpn_reg_loss_m', rpn_reg_loss_m, n_iter) writer.add_scalar('losses/faster_rcnn_cls_loss_m', faster_rcnn_cls_loss_m, n_iter) writer.add_scalar('losses/faster_rcnn_reg_loss_m', faster_rcnn_reg_loss_m, n_iter) if cfg.VERBOSE: writer.add_scalar('rpn/fg_acc', float(rpn_tp) / rpn_fg, n_iter) writer.add_scalar('rpn/bg_acc', float(rpn_tn) / rpn_bg, n_iter) writer.add_scalar( 'rcnn/fg_acc', float(faster_rcnn_tp) / faster_rcnn_fg, n_iter) writer.add_scalar( 'rcnn/bg_acc', float(faster_rcnn_tn) / faster_rcnn_bg, n_iter) temp_loss = 0 rpn_tp, rpn_tn, rpn_fg, rpn_bg = 0, 0, 0, 0 faster_rcnn_tp, faster_rcnn_tn, faster_rcnn_fg, faster_rcnn_bg = 0, 0, 0, 0 start_time = time.time() if not os.path.exists(args.output_dir): os.mkdir(args.output_dir) if epoch % args.save_interval == 0: save_name = os.path.join( args.output_dir, 'faster_rcnn101_epoch_{}.pth'.format(epoch)) torch.save({ 'model': model.state_dict(), 'epoch': epoch, 'lr': lr }, save_name)
class FasterRCNN: def __init__(self, weights=None): if weights is None: if not os.path.exists('weights'): os.mkdir('weights') download_url = 'https://github.com/ArnholdInstitute/ColdSpots/releases/download/1.0/faster-rcnn.zip' if not os.path.exists('weights/faster-rcnn'): print('Downloading weights for faster-rcnn') if not os.path.exists(os.path.join('weights/faster-rcnn.zip')): check_output([ 'wget', download_url, '-O', 'weights/faster-rcnn.zip' ]) print('Unzipping...') check_output( ['unzip', 'weights/faster-rcnn.zip', '-d', 'weights']) description = json.load( open('weights/faster-rcnn/description.json')) weights = os.path.join('weights/faster-rcnn', description['weights']) print('Building model...') self.model = FasterRCNNModel(classes=['__backround__', 'building'], debug=False) network.load_net(weights, self.model) self.model.cuda() self.model.eval() def close_session(self): pass def predict_image(self, image, threshold, eval_mode=False): """ Infer buildings for a single image. Inputs: image :: n x m x 3 ndarray - Should be in RGB format """ if type(image) is str: image = cv2.imread(image) else: image = image[:, :, (2, 1, 0)] # RGB -> BGR im_data, im_scales = self.model.get_image_blob(image) im_info = np.array( [[im_data.shape[1], im_data.shape[2], im_scales[0]]], dtype=np.float32) t0 = time.time() cls_prob, bbox_pred, rois = self.model(im_data, im_info) runtime = time.time() - t0 scores = cls_prob.data.cpu().numpy() boxes = rois.data.cpu().numpy()[:, 1:5] / im_info[0][2] if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred.data.cpu().numpy() pred_boxes = bbox_transform_inv(boxes, box_deltas) pred_boxes = clip_boxes(pred_boxes, image.shape) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) current = np.concatenate( [ pred_boxes[:, 4:8], # (skip the background class) np.expand_dims(scores[:, 1], 1) ], axis=1) suppressed = current[py_cpu_nms(current.astype(np.float32), 0.3)] suppressed = pandas.DataFrame( suppressed, columns=['x1', 'y1', 'x2', 'y2', 'score']) if eval_mode: return suppressed[ suppressed['score'] >= threshold], suppressed, runtime else: return suppressed[suppressed['score'] >= threshold] def predict_all(self, test_boxes_file, threshold, data_dir=None): test_boxes = json.load(open(test_boxes_file)) if data_dir is None: data_dir = os.path.join(os.path.dirname(test_boxes_file)) total_time = 0.0 for i, anno in enumerate(test_boxes): orig_img = cv2.imread( '%s/%s' % (data_dir, anno['image_path']))[:, :, (2, 1, 0)] pred, all_rects, time = self.predict_image(orig_img, threshold, eval_mode=True) pred['image_id'] = i all_rects['image_id'] = i yield pred, all_rects, test_boxes[i]
def main(): global args print "Loading training set and testing set..." train_set = visual_genome(args.dataset_option, 'train') test_set = visual_genome(args.dataset_option, 'test') object_classes = test_set.object_classes print "Done." train_loader = torch.utils.data.DataLoader(train_set, batch_size=1, shuffle=True, num_workers=8, pin_memory=True) test_loader = torch.utils.data.DataLoader(test_set, batch_size=1, shuffle=False, num_workers=8, pin_memory=True) net = FasterRCNN(nhidden=args.mps_feature_len, use_kmeans_anchors=args.use_kmeans_anchors, n_classes=len(object_classes), model=args.base_model) if args.resume_model: print 'Resume training from: {}'.format(args.resume_model) if len(args.resume_model) == 0: raise Exception('[resume_model] not specified') network.load_net(args.detection_model, net) # optimizer = torch.optim.SGD([ # {'params': list(net.parameters())}, # ], lr=args.lr, momentum=args.momentum, weight_decay=0.0005) else: print 'Training from scratch...Initializing network...' optimizer = torch.optim.SGD(list(net.parameters()), lr=args.lr, momentum=args.momentum, weight_decay=0.0005) # network.set_trainable(net.features, requires_grad=True) net.cuda() if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) best_map = 0.0 for epoch in range(0, args.max_epoch): # Training train(train_loader, net, optimizer, epoch) # update learning rate if epoch % args.step_size == args.step_size - 1: args.clip_gradient = False args.lr /= 5 for param_group in optimizer.param_groups: param_group['lr'] = args.lr save_name = os.path.join( args.output_dir, '{}_epoch_{}.h5'.format(args.model_name, epoch)) network.save_net(save_name, net) print('save model: {}'.format(save_name)) try: # Testing map = evaluate(test_loader, net, object_classes) print( 'Epoch[{epoch:d}]: ' 'Recall: ' 'object: {map: .3f}%% (Best: {best_map: .3f}%%)'.format( epoch=epoch, map=map * 100, best_map=best_map * 100)) if map > best_map: best_map = map save_name = os.path.join( args.output_dir, '{}_best.h5'.format(args.model_name, epoch)) network.save_net(save_name, net) except: continue
def test(): args = parse_args() # perpare data print('load data') if args.dataset == 'voc07test': dataset_name = 'voc_2007_test' elif args.dataset == 'voc12test': dataset_name = 'voc_2012_test' else: raise NotImplementedError cfg.TRAIN.USE_FLIPPED = False imdb, roidb = combined_roidb(dataset_name) test_dataset = RoiDataset(roidb) test_dataloader = DataLoader(dataset=test_dataset, batch_size=1, shuffle=False) test_data_iter = iter(test_dataloader) # load model model = FasterRCNN(backbone=args.backbone) model_name = '0712_faster_rcnn101_epoch_{}.pth'.format(args.check_epoch) if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) model_path = os.path.join(args.output_dir, model_name) model.load_state_dict(torch.load(model_path)['model']) if args.use_gpu: model = model.cuda() model.eval() num_images = len(imdb.image_index) det_file_path = os.path.join(args.output_dir, 'detections.pkl') all_boxes = [[[] for _ in range(num_images)] for _ in range(imdb.num_classes)] empty_array = np.transpose(np.array([[], [], [], [], []]), (1, 0)) torch.set_grad_enabled(False) for i in range(num_images): start_time = time.time() im_data, gt_boxes, im_info = next(test_data_iter) if args.use_gpu: im_data = im_data.cuda() gt_boxes = gt_boxes.cuda() im_info = im_info.cuda() im_data_variable = Variable(im_data) det_tic = time.time() rois, faster_rcnn_cls_prob, faster_rcnn_reg, _, _, _, _, _ = model( im_data_variable, gt_boxes, im_info) scores = faster_rcnn_cls_prob.data boxes = rois.data[:, 1:] boxes_deltas = faster_rcnn_reg.data if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: boxes_deltas = boxes_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() boxes_deltas = boxes_deltas.view(-1, 4 * imdb.num_classes) pred_boxes = bbox_transform_inv_cls(boxes, boxes_deltas) pred_boxes = clip_boxes_cls(pred_boxes, im_info[0]) pred_boxes /= im_info[0][2].item() det_toc = time.time() detect_time = det_tic - det_toc nms_tic = time.time() if args.vis: im_show = Image.open(imdb.image_path_at(i)) for j in range(1, imdb.num_classes): inds = torch.nonzero(scores[:, j] > args.thresh).view(-1) if inds.numel() > 0: cls_score = scores[:, j][inds] _, order = torch.sort(cls_score, 0, True) cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_score.unsqueeze(1)), 1) cls_dets = cls_dets[order] keep = nms(cls_dets, 0.3) cls_dets = cls_dets[keep.view(-1).long()] if args.vis: cls_name_dets = np.repeat(j, cls_dets.size(0)) im_show = draw_detection_boxes(im_show, cls_dets.cpu().numpy(), cls_name_dets, imdb.classes, 0.5) all_boxes[j][i] = cls_dets.cpu().numpy() else: all_boxes[j][i] = empty_array if args.max_per_image > 0: image_scores = np.hstack( [all_boxes[j][i][:, -1] for j in range(1, imdb.num_classes)]) if len(image_scores) > args.max_per_image: image_thresh = np.sort(image_scores)[-args.max_per_image] for j in range(1, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] if args.vis: plt.imshow(im_show) plt.show() nms_toc = time.time() nms_time = nms_tic - nms_toc sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s \r' \ .format(i + 1, num_images, detect_time, nms_time)) sys.stdout.flush() with open(det_file_path, 'wb') as f: pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL) print('Evaluating detections') imdb.evaluate_detections(all_boxes, args.output_dir) end_time = time.time() print("test time: %0.4fs" % (end_time - start_time))
def test(): import os # im_file = 'demo/004545.jpg' # im_file = 'data/VOCdevkit2007/VOC2007/JPEGImages/009036.jpg' # im_file = '/media/longc/Data/data/2DMOT2015/test/ETH-Crossing/img1/000100.jpg' matName = 'exp5bC9.mat' model_file = '/home/dong/PycharmProjects/fasterRCNN/faster_rcnn_pytorch-master/model/CLASP_m_rotation_withNoRot_More/faster_rcnn_20000.h5' # model_file = '/media/longc/Data/models/faster_rcnn_pytorch3/faster_rcnn_100000.h5' # model_file = '/media/longc/Data/models/faster_rcnn_pytorch2/faster_rcnn_2000.h5' #CLASP_class = np.asarray(['__background__', # always index 0 # 'person', 'bin']) UCF_class = np.asarray(['__background__', 'person', 'bin']) label = UCF_class[1:] #CLASP_class[1:] detector = FasterRCNN(UCF_class) #CLASP_class network.load_net(model_file, detector) detector.cuda() detector.eval() print('load model successfully!') #filename = "/home/dong/PycharmProjects/fasterRCNN/faster_rcnn_pytorch-master/CLASP/video/07212017_EXPERIMENT_9A_Aug7/mp4s/Camera_9.mp4" #vid = imageio.get_reader(filename, 'ffmpeg') imgPath = "/home/dong/PycharmProjects/fasterRCNN/faster_rcnn_pytorch-master/CLASP/exp5bC9/exp5bC9/" #"/home/dong/PycharmProjects/fasterRCNN/faster_rcnn_pytorch-master/CLASP/C11_50_selected/" imgType = '*.jpg' image_list = [] for filename in glob.glob(imgPath + imgType): # assuming jpg #im = Image.open(filename) image_list.append(filename) #im.close() spliter = 'Frame' #'Frame' result = {x: np.zeros([1, 5]) for x in label} for i, name in enumerate(image_list): ele = Image.open(name) image = np.asarray(ele) str = ele.filename str = str.split(spliter)[1].split('.')[0] ind = int(str) t = Timer() t.tic() dets, scores, classes = detector.detect(image, 0.7) runtime = t.toc() for j, label in enumerate(classes): tmp = np.empty([1, 5]) tmp[0][0:4] = dets[j] tmp[0][4] = ind if result[label].max() == 0: result[label][0] = tmp else: result[label] = np.append(result[label], tmp, axis=0) print('Progress: {a:8.2f}%'.format(a=i * 100.0 / image_list.__len__())) print('total spend: {}s'.format(runtime)) ele.close() sio.savemat(matName, result) #result_9AC11_selected.mat #for im in enumerate(vid): #image = np.asarray(im) # network.save_net(r'/media/longc/Data/models/VGGnet_fast_rcnn_iter_70000.h5', detector) # print('save model succ') # image = np.zeros(shape=[600, 800, 3], dtype=np.uint8) + 255 '''
def test(): # Set up dataloader data_loader = DAVIS_seq_dataloader(split='val') model_file = './model/VGGnet_fast_rcnn_iter_70000.h5' detector = FasterRCNN() network.load_net(model_file, detector) detector.cuda() detector.eval() print('Load Faster R-CNN model successfully!') # unet_model = './model/vgg_unet_1e-4_500.h5' # unet = UNet() # network.load_net(unet_model, unet) # unet.cuda() # network.weights_normal_init(unet, dev=0.01) # unet.load_from_faster_rcnn_h5(h5py.File(model_file)) criterion_bce = torch.nn.BCELoss().cuda() weight_decay = 5e-5 # optimizer = torch.optim.SGD(list(unet.parameters())[26:], lr=1e-4, weight_decay=weight_decay) # print('Load U-Net model successfully!') crop_set = [] # Iterate for i in range(data_loader.num_seq): # Get the first frame info seq = data_loader.seq_list[data_loader.out_pointer] seq_len = data_loader.seq_len[seq] img_blobs, seg_blobs = data_loader.get_next_minibatch() img = img_blobs[0,:,:,:] im_data, im_scales = detector.get_image_blob(img) im_info = np.array([[im_data.shape[1], im_data.shape[2], im_scales[0]]], dtype=np.float32) # Get the category of the object in the first frame rmin, rmax, cmin, cmax = bbox(seg_blobs[0,:,:,0]) features, rois = detector(im_data, im_info, rpn_only=True) new_rois_np = np.array([[0, cmin, rmin, cmax, rmax]], dtype=np.float32) new_rois_t = torch.from_numpy(new_rois_np).cuda() new_rois = Variable(new_rois_t, requires_grad=False) pooled_features = detector.roi_pool(features, new_rois) x = pooled_features.view(pooled_features.size()[0], -1) x = detector.fc6(x) x = detector.fc7(x) cls_score = detector.score_fc(x) cls_prob = F.softmax(cls_score) bbox_pred = detector.bbox_fc(x) cls_prob_np = cls_prob.cpu().data.numpy() bbox_pred_np = bbox_pred.cpu().data.numpy() cls_idx = cls_prob_np.argmax() cls_conf = cls_prob_np.max() # Overfit U-Net with the first frame # for i in range(100): # unet.train() # img_t = torch.from_numpy(img_blobs).permute(0,3,1,2).float().cuda() # img_v = Variable(img_t, requires_grad=False) # seg_t = torch.from_numpy(seg_blobs).permute(0,3,1,2).float().cuda() # seg_v = Variable(seg_t, requires_grad=False) # pred = unet(img_v) # loss = criterion_bce(pred, seg_v) # pred_view = pred.view(-1, 1) # seg_view = seg_v.view(-1, 1) # EPS = 1e-6 # loss = 0.6 * seg_view.mul(torch.log(pred_view+EPS)) + 0.4 * seg_view.mul(-1).add(1).mul(torch.log(1-pred+EPS)) # loss = -torch.mean(loss) # loss_val = loss.data[0] # optimizer.zero_grad() # loss.backward() # optimizer.step() # print('{}/100: {}'.format(i, loss_val)) # unet.eval() # Merge region proposals overlapping with last frame proposal for j in range(1, seq_len): img_blobs, _ = data_loader.get_next_minibatch() img = img_blobs[0,:,:,:] im_data, im_scales = detector.get_image_blob(img) # 300 x 5, the first elements are useless here features, rois = detector(im_data, im_info, rpn_only=True) x1, y1, x2, y2 = merge_rois((rmin, rmax, cmin, cmax), rois.cpu().data.numpy(), thres=0.75) # Have overlapping proposals if x1 is not None: # Send to following layers to refine the bbox new_rois_np = np.array([[0, x1, y1, x2, y2]], dtype=np.float32) new_rois_t = torch.from_numpy(new_rois_np).cuda() new_rois = Variable(new_rois_t, requires_grad=False) pooled_features = detector.roi_pool(features, new_rois) x = pooled_features.view(pooled_features.size()[0], -1) x = detector.fc6(x) x = detector.fc7(x) cls_score = detector.score_fc(x) cls_prob = F.softmax(cls_score) bbox_pred = detector.bbox_fc(x) cls_prob_np = cls_prob.cpu().data.numpy() bbox_pred_np = bbox_pred.cpu().data.numpy() # Only regress bbox when confidence is greater than 0.8 if cls_prob_np.max() > 0.8 and cls_prob_np.argmax() != 0: keep = cls_prob_np.argmax() pred_boxes, scores, classes = detector.interpret_faster_rcnn(cls_prob, bbox_pred, new_rois, im_info, im_data.shape, 0.8) cx = (x1 + x2) / 2 cy = (y1 + y2) / 2 width = x2 - x1 + 1 height = y2 - y1 + 1 dx = bbox_pred_np[0,keep*4+0] dy = bbox_pred_np[0,keep*4+1] dw = bbox_pred_np[0,keep*4+2] dh = bbox_pred_np[0,keep*4+3] pred_x = dx * width + cx pred_y = dy * height + cy pred_w = np.exp(dw) * width pred_h = np.exp(dh) * height x1 = pred_x - pred_w / 2 x2 = pred_x + pred_w / 2 y1 = pred_y - pred_h / 2 y2 = pred_y + pred_h / 2 # No overlapping proposals if x1 is None: # Using Faster R-CNN again to find potential objects dets, scores, classes = detector.detect(img, 0.6) # Cannot find any salient object if dets.shape[0] == 0: x1, y1, x2, y2 = cmin, rmin, cmax, rmax else: x1 = dets[:,0] y1 = dets[:,1] x2 = dets[:,2] y2 = dets[:,3] pred_area = (x2 - x1 + 1) * (y2 - y1 + 1) init_area = (cmax - cmin + 1) * (rmax - rmin + 1) xx1 = np.maximum(x1, cmin) xx2 = np.minimum(x2, cmax) yy1 = np.maximum(y1, rmin) yy2 = np.minimum(y2, rmax) inter = (xx2 - xx1 + 1) * (yy2 - yy1 + 1) ovr = inter / (pred_area + init_area - inter) # If there is overlapping, choose the largest IoU bbox try: ovr = ovr[ovr > 0.3] ovr_idx = np.argsort(ovr)[-1] x1 = dets[ovr_idx,0] y1 = dets[ovr_idx,1] x2 = dets[ovr_idx,2] y2 = dets[ovr_idx,3] # Else, choose the highest objectness score one except: if cls_idx == 0: temp_idx = scores.argmax() x1 = dets[temp_idx,0] y1 = dets[temp_idx,1] x2 = dets[temp_idx,2] y2 = dets[temp_idx,3] else: cx = (x1 + x2) / 2 cy = (y1 + y2) / 2 cc = (cmin + cmax) / 2 cr = (rmin + rmax) / 2 dist = np.sqrt(np.square(cx-cc) + np.square(cy-cr)) dist_idx = np.argsort(dist) for di in dist_idx: if classes[di] == _CLASSES[cls_idx]: x1 = dets[di,0] y1 = dets[di,1] x2 = dets[di,2] y2 = dets[di,3] # Crop the region and send it to U-Net try: x1 = int(max(x1, 0)) x2 = int(min(x2, im_data.shape[2])) y1 = int(max(y1, 0)) y2 = int(min(y2, im_data.shape[1])) except: x1 = int(max(x1[0], 0)) x2 = int(min(x2[0], im_data.shape[2])) y1 = int(max(y1[0], 0)) y2 = int(min(y2[0], im_data.shape[1])) # MEAN_PIXEL = np.array([103.939, 116.779, 123.68]) # crop = img_blobs[:, y1:y2+1, x1:x2+1, :] - MEAN_PIXEL # crop = img_blobs[:,:,:,:] - MEAN_PIXEL # crop_v = Variable(torch.from_numpy(crop).permute(0, 3, 1, 2).cuda(), requires_grad=False) # pred = unet(crop_v) # pred_np = pred.cpu().data.numpy()[0,0,:,:] # pred_np[pred_np < 0.5] = 0 # pred_np[pred_np >= 0.5] = 1 # pred_np = pred_np * 255 # res = pred_np.astype(int) # cv2.imwrite('test.png', res) if y2 - y1 <= 1 or x2 - x1 <= 1: ipdb.set_trace() cv2.imwrite(os.path.join('demo', 'crop_{}_{}.png'.format(i, j)), img[y1:y2+1,x1:x2+1,:]) rmin = y1 rmax = y2 cmin = x1 cmax = x2 im2show = np.copy(img) cv2.rectangle(im2show, (int(x1),int(y1)), (int(x2),int(y2)), (0, 255, 0), 2) cv2.imwrite(os.path.join('demo', '{}_{}.jpg'.format(i, j)), im2show) temp = [i, j, x1, y1, x2, y2] crop_set.append(temp) # Save crop_set = np.array(crop_set) np.save('crop', crop_set)
thickness=1) im_name = os.path.basename(image_file) print(os.path.join('demo/det_results', im_name)) cv2.imwrite(os.path.join('demo/det_results', im_name), im2show) #cv2.imshow('demo', im2show) #cv2.waitKey(0) def folder_test(net, folder): txt_file = folder + 'JPEGImages/file_name.txt' with open(txt_file) as f: for line in f: img_path = folder + 'JPEGImages/' + line.strip('\n') + '.JPG' anno_path = folder + 'Annotations/' + line.strip('\n') + '.xml' image_test(net, img_path, anno_path) if __name__ == '__main__': model_file = 'models/saved_model3/faster_rcnn_100000.h5' detector = FasterRCNN() network.load_net(model_file, detector) detector.cuda() detector.eval() print('load model successfully!') #image_file = 'demo/000001.JPG' #image_test(detector, image_file, None) folder = '/data/jmtian/PlateData/PVW_WRM_CUT/' folder_test(detector, folder)
print("Loading image embedding model...") if args.image_embedding_model_type == "resnet": im_emb_model = ResNet50() elif args.image_embedding_model_type == "vgg": im_emb_model = VGG16() else: print("--image_embedding_model_type must be either resnet or vgg") sys.exit(0) print("Done!") # create Faster-RCNN model for state featurization print("Loading Fast-RCNN...") model_file = 'VGGnet_fast_rcnn_iter_70000.h5' model_frcnn = FasterRCNN() network.load_net(model_file, model_frcnn) model_frcnn.cuda() model_frcnn.eval() print("Done!") # create DQN's for the next object, predicates, and attributes print("Creating DQN models...") DQN_next_object_main = DQN_MLP(2048*3+9600 + parameters["maximum_num_entities_per_image"], 1) DQN_next_object_target = DQN_MLP(2048*3+9600 + parameters["maximum_num_entities_per_image"], 1) DQN_predicate_main = DQN_MLP(2048*3+9600 + len(semantic_action_graph.predicate_nodes), 1) DQN_predicate_target = DQN_MLP(2048*3+9600 + len(semantic_action_graph.predicate_nodes), 1) DQN_attribute_main = DQN_MLP(2048*3+9600 + len(semantic_action_graph.attribute_nodes), 1) DQN_attribute_target = DQN_MLP(2048*3+9600 + len(semantic_action_graph.attribute_nodes), 1) print("Done!") # create shared optimizer # The paper says this optimizer is shared. Right now
momentum = cfg.TRAIN.MOMENTUM weight_decay = cfg.TRAIN.WEIGHT_DECAY disp_interval = cfg.TRAIN.DISPLAY log_interval = cfg.TRAIN.LOG_IMAGE_ITERS # load data imdb = VisualGenome(split=0, num_im=50) roidb = imdb.roidb data_layer = RoIDataLayer(roidb, imdb.num_classes) # load net net = FasterRCNN(classes=imdb.classes, debug=_DEBUG) network.weights_normal_init(net, dev=0.01) network.load_net(pretrained_model, net) # network.load_pretrained_npy(net, 'checkpoints/VGG_imagenet.npy') net.cuda() net.train() params = list(net.parameters()) print("Params are {}".format( '\n'.join(['{}: {}'.format(n, p.size()) for n,p in net.named_parameters()])) ) # optimizer = torch.optim.Adam(params, lr=0.001, eps=1e-4, weight_decay=weight_decay) optimizer = torch.optim.SGD(params[8:], lr=lr, momentum=momentum, weight_decay=weight_decay) if not os.path.exists(output_dir): os.mkdir(output_dir) # tensorboad use_tensorboard = use_tensorboard and CrayonClient is not None
with open(det_file, 'wb') as f: cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) print 'Evaluating detections' imdb_0.evaluate_detections(all_boxes, output_dir) if __name__ == '__main__': imdb_0 = get_imdb(imdb_test_name_0) imdb_0.competition_mode(on=True) net_0 = FasterRCNN(classes=imdb_0.classes, debug=False) network.load_net(trained_model_0, net_0) print('load model 0 successfully!') net_0.cuda() net_0.eval() imdb_1 = get_imdb(imdb_test_name_1) imdb_1.competition_mode(on=True) net_1 = FasterRCNN(classes=imdb_1.classes, debug=False) network.load_net(trained_model_1, net_1) print('load model 1 successfully!') net_1.cuda() net_1.eval() # evaluation test_net_u(net_0, net_1, imdb_0, imdb_1,