def train(**kwargs): opt._parse(kwargs) print('load data') dataset = Dataset(opt) dataloader = data_.DataLoader(dataset, \ batch_size=1, \ shuffle=True, \ # pin_memory=True, num_workers=opt.num_workers) testset = TestDataset(opt) test_dataloader = data_.DataLoader(testset, batch_size=1, num_workers=opt.test_num_workers, shuffle=False, \ pin_memory=True ) faster_rcnn = FasterRCNNVGG16(n_fg_class=dataset.get_class_count(), ratios=[1], anchor_scales=[1]) print('model construct completed') trainer = FasterRCNNTrainer(faster_rcnn, n_fg_class=dataset.get_class_count()) if opt.use_cuda: trainer = trainer.cuda() if opt.load_path: old_state = trainer.load(opt.load_path) print('load pretrained model from %s' % opt.load_path) if opt.validate_only: num_eval_images = len(testset) eval_result = eval(test_dataloader, faster_rcnn, trainer, testset, test_num=num_eval_images) print('Evaluation finished, obtained {} using {} out of {} images'. format(eval_result, num_eval_images, len(testset))) return if opt.load_path and 'epoch' in old_state.keys(): starting_epoch = old_state['epoch'] + 1 print('Model was trained until epoch {}, continuing with epoch {}'. format(old_state['epoch'], starting_epoch)) else: starting_epoch = 0 #trainer.vis.text(dataset.db.label_names, win='labels') best_map = 0 lr_ = opt.lr global_step = 0 for epoch in range(starting_epoch, opt.num_epochs): lr_ = opt.lr * (opt.lr_decay**(epoch // opt.epoch_decay)) trainer.faster_rcnn.set_lr(lr_) print('Starting epoch {} with learning rate {}'.format(epoch, lr_)) trainer.reset_meters() for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader), total=len(dataset)): global_step = global_step + 1 scale = at.scalar(scale) if opt.use_cuda: img, bbox, label = img.cuda().float(), bbox_.float().cuda( ), label_.float().cuda() else: img, bbox, label = img.float(), bbox_.float(), label_.float() img, bbox, label = Variable(img), Variable(bbox), Variable(label) losses = trainer.train_step(img, bbox, label, scale) if (ii + 1) % opt.plot_every == 0: if os.path.exists(opt.debug_file): ipdb.set_trace() # plot loss #trainer.vis.plot_many(trainer.get_meter_data()) """ # plot groud truth bboxes ori_img_ = inverse_normalize(at.tonumpy(img[0])) gt_img = visdom_bbox(ori_img_, at.tonumpy(bbox_[0]), at.tonumpy(label_[0]), label_names=dataset.get_class_names()+['BG']) trainer.vis.img('gt_img', gt_img) # plot predicti bboxes _bboxes, _labels, _scores = trainer.faster_rcnn.predict([ori_img_], visualize=True) pred_img = visdom_bbox(ori_img_, at.tonumpy(_bboxes[0]), at.tonumpy(_labels[0]).reshape(-1), at.tonumpy(_scores[0]), label_names=dataset.get_class_names()+['BG']) trainer.vis.img('pred_img', pred_img) """ # rpn confusion matrix(meter) #trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm') # roi confusion matrix #trainer.vis.img('roi_cm', at.totensor(trainer.roi_cm.conf, False).float()) #print('Current total loss {}'.format(losses[-1].tolist())) trainer.vis.plot('train_total_loss', losses[-1].tolist()) if (global_step) % opt.snapshot_every == 0: snapshot_path = trainer.save(epoch=epoch) print("Snapshotted to {}".format(snapshot_path)) #snapshot_path = trainer.save(epoch=epoch) #print("After epoch {}: snapshotted to {}".format(epoch,snapshot_path)) for lo in losses: del lo del img, bbox_, label_, scale torch.cuda.empty_cache() eval_result = eval(test_dataloader, faster_rcnn, trainer, testset, test_num=min(opt.test_num, len(testset))) print(eval_result) # TODO: this definitely is not good and will bias evaluation if eval_result['map'] > best_map: best_map = eval_result['map'] best_path = trainer.save(best_map=eval_result['map'], epoch=epoch) print("After epoch {}: snapshotted to {}".format(epoch, best_path)) trainer.vis.plot('test_map', eval_result['map']) del eval_result torch.cuda.empty_cache()
opt = parser.parse_args() FasterRCNNOpt = Config() UPSCALE_FACTOR = opt.upscale_factor TEST_MODE = True if opt.test_mode == 'GPU' else False MODEL_NAME = opt.model_name gan_model = Generator(UPSCALE_FACTOR).eval() faster_rcnn = FasterRCNNVGG16() trainer = FasterRCNNTrainer(faster_rcnn) if TEST_MODE: gan_model.cuda() trainer.cuda() gan_model.load_state_dict(torch.load('epochs/' + MODEL_NAME)) # trainer.load('epochs/samir_fast_rcnn_epoch60.pth') else: gan_model.load_state_dict(torch.load('epochs/' + MODEL_NAME, map_location=lambda storage, loc: storage)) # trainer.load('epochs/samir_fast_rcnn_epoch60.pth') image = read_image('misc/demo.jpg') image = Variable(ToTensor()(image), volatile=True).unsqueeze(0) if TEST_MODE: image = image.cuda() start = time.clock() out = gan_model(image) out_img = ToPILImage()(out[0].data.cpu())
class PlasticDetector: def __init__(self, model_path, useGPU, n_fg_classes=2): ''' Creates a new detection model using the weights stored in the file MODEL_PATH and initializes the GPU if USEGPU is set to true. MODEL_PATH: path to a trained detection model. USEGPU: if true, the GPU will be used for faster computations. ''' torch.set_num_threads(1) opt.load_path = model_path self.faster_rcnn = FasterRCNNVGG16(n_fg_class=n_fg_classes, anchor_scales=[1]) self.trainer = FasterRCNNTrainer(self.faster_rcnn, n_fg_class=n_fg_classes) if useGPU: self.trainer = self.trainer.cuda() old_state = self.trainer.load(model_path) self.transforms = transforms.ToTensor() self.useGPU = useGPU def predict_image(self, img, topk): ''' Detects objects in the provided testing images. IMG: PIL image fitting the input of the trained model TOPK: the number of bounding boxes to return. We return the most confident bounding boxes first. RETURNs: (BBOXES, CONFS) where BBOXES is a n x 4 array, where each line corresponds to one bounding box. The bounding box coordniates are stored in the format [x_min, y_min, x_max, y_max], where x corresponds to the width and y to the height. CONFS are the confidence values for each bounding box and are a n x m array. Each row corresponds to the bounding box in the same row of BBOXES and provides the scores for the m classes, that the model was trained to detect. ''' pred_bboxes, pred_labels, pred_scores = self._run_prediction(img) return pred_bboxes[:topk, [1, 0, 3, 2]], pred_scores[:topk] def annotate_image(self, img, topk): ''' Detects objects in the provided testing images. IMG: PIL image fitting the input of the trained model TOPK: the number of bounding boxes to return. We return the most confident bounding boxes first. RETURNS: IMG: a PIL image with the detected bounding boxes annotated as rectangles. ''' pred_bboxes, pred_labels, pred_scores = self._run_prediction(img) draw = PIL.ImageDraw.Draw(img) colors = [(255, 0, 0), (0, 255, 0)] for bbox, label, score in zip(pred_bboxes, pred_labels, pred_scores): draw.rectangle(bbox[[1, 0, 3, 2]], outline=colors[label]) #font = PIL.ImageFont.truetype("sans-serif.ttf", 16) #draw.text(bbox[[1,0]],"Sample Text",colors[label]) return img def _run_prediction(self, img): ''' Prepare an input image for CNN processing. IMG: PIL image RETURN: IMG as pytorch tensor in the format 1xCxHxW normalized according to data.dataset.caffe_normalize. ''' img = img.convert('RGB') img = np.asarray(img, dtype=np.float32) if img.ndim == 2: # reshape (H, W) -> (1, H, W) img = img[np.newaxis] else: # transpose (H, W, C) -> (C, H, W) img = img.transpose((2, 0, 1)) proc_img = data.dataset.caffe_normalize(img / 255.) tensor_img = torch.from_numpy(proc_img).unsqueeze(0) if self.useGPU: tensor_img = tensor_img.cuda() # This preset filters bounding boxes with a score < 0.7 # and has to be set everytime before using predict() self.faster_rcnn.use_preset('visualize') pred_bboxes, pred_labels, pred_scores = self.faster_rcnn.predict( tensor_img, [(img.shape[1], img.shape[2])]) box_filter = np.array(pred_scores[0]) > 0.7 return pred_bboxes[0][box_filter], pred_labels[0][ box_filter], pred_scores[0][box_filter]
def train(**kwargs): """ The main entry point for training; trains a FasterRCNN-based detector. """ opt._parse(kwargs) # Loading class names from checkpoint, if available # We need to load the checkpoint here if opt.load_path: old_state = torch.load(opt.load_path) class_names = old_state['class_names'] best_map = old_state['best_map'] else: class_names = [] best_map = 0 old_state = None print('load data') dataset = Dataset(opt, class_names) dataloader = data_.DataLoader(dataset, \ batch_size=1, \ shuffle=True, \ # pin_memory=True, num_workers=opt.num_workers) testset = TestDataset(opt, dataset.get_class_names()) test_dataloader = data_.DataLoader(testset, \ batch_size=1, \ num_workers=opt.test_num_workers, shuffle=False, \ pin_memory=True ) faster_rcnn = FasterRCNNVGG16(n_fg_class=dataset.get_class_count()) print('Model construct completed') trainer = FasterRCNNTrainer(faster_rcnn, n_fg_class=dataset.get_class_count()) if opt.use_cuda: trainer = trainer.cuda() if opt.load_path: trainer.load(old_state) print_log('load pretrained model from %s' % opt.load_path) if opt.validate_only: num_eval_images = len(testset) eval_result = eval(test_dataloader, faster_rcnn, trainer, testset, global_step, test_num=num_eval_images) print_log('Evaluation finished, obtained {} using {} out of {} images'. format(eval_result, num_eval_images, len(testset))) return if old_state and 'epoch' in old_state.keys(): starting_epoch = old_state['epoch'] + 1 print_log('Model was trained until epoch {}, continuing with epoch {}'. format(old_state['epoch'], starting_epoch)) else: starting_epoch = 0 lr_ = opt.lr global_step = 0 for epoch in range(starting_epoch, opt.num_epochs): writer.add_scalar('epoch', epoch, global_step) lr_ = opt.lr * (opt.lr_decay** np.sum(epoch >= np.array(opt.lr_schedule))) trainer.faster_rcnn.set_lr(lr_) print_log('Starting epoch {} with learning rate {}'.format(epoch, lr_)) trainer.reset_meters() for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader), total=len(dataset)): global_step = global_step + 1 scale = at.scalar(scale).item() if opt.use_cuda: img = img.cuda().float() label = label_.float().cuda() if len(bbox_[0]) > 0: bbox = bbox_.float().cuda() else: bbox = bbox_ else: img, label = img.float(), label_.float() if len(bbox_[0]) > 0: bbox = bbox_.float() else: bbox = bbox_ img, label = Variable(img), Variable(label) if len(bbox[0]) > 0: bbox = Variable(bbox) else: bbox = np.asarray(bbox) #img, bbox, label = Variable(img), Variable(bbox), Variable(label) losses = trainer.train_step(img, bbox, label, scale) writer.add_scalars( 'training/losses', dict(total_loss=losses.total_loss, roi_cls_loss=losses.roi_cls_loss, roi_loc_loss=losses.roi_loc_loss, rpn_cls_loss=losses.rpn_cls_loss, rpn_loc_loss=losses.rpn_loc_loss), global_step) if (ii + 1) % opt.plot_every == 0: if os.path.exists(opt.debug_file): ipdb.set_trace() # plot loss # trainer.vis.plot_many(trainer.get_meter_data()) # plot ground truth bboxes ori_img_ = inverse_normalize(at.tonumpy(img[0])) gt_img = visdom_bbox(ori_img_, at.tonumpy(bbox_[0]), at.tonumpy(label_[0]), label_names=dataset.get_class_names() + ['BG']) writer.add_image('gt_img', gt_img, global_step) # plot predicti bboxes _bboxes, _labels, _scores = trainer.faster_rcnn.predict( [ori_img_], visualize=True) pred_img = visdom_bbox(ori_img_, at.tonumpy(_bboxes[0]), at.tonumpy(_labels[0]).reshape(-1), at.tonumpy(_scores[0]), label_names=dataset.get_class_names() + ['BG']) writer.add_image('pred_img', pred_img, global_step) # rpn confusion matrix(meter) # trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm') # roi confusion matrix # trainer.vis.img('roi_cm', at.totensor(trainer.roi_cm.conf, False).float()) if (global_step) % opt.snapshot_every == 0: snapshot_path = trainer.save( epoch=epoch, class_names=testset.get_class_names()) print_log("Snapshotted to {}".format(snapshot_path)) #snapshot_path = trainer.save(epoch=epoch) #print("After epoch {}: snapshotted to {}".format(epoch,snapshot_path)) for lo in losses: del lo del img, bbox_, label_, scale torch.cuda.empty_cache() eval_result = eval(test_dataloader, faster_rcnn, trainer, testset, global_step, test_num=min(opt.test_num, len(testset))) print_log(eval_result) # TODO: this definitely is not good and will bias evaluation if eval_result['map'] > best_map: best_map = eval_result['map'] best_path = trainer.save(best_map=eval_result['map'], epoch=epoch, class_names=testset.get_class_names()) print_log("After epoch {}: snapshotted to {}".format( epoch, best_path)) del eval_result torch.cuda.empty_cache()