def main(): dataset = VOCDetectionDataset(year='2007', split='test') models = [ ('Faster R-CNN', FasterRCNNVGG16(pretrained_model='voc07')), ('SSD300', SSD300(pretrained_model='voc0712')), ('SSD512', SSD512(pretrained_model='voc0712')), ] indices = [29, 301, 189, 229] fig = plot.figure(figsize=(30, 30)) for i, idx in enumerate(indices): for j, (name, model) in enumerate(models): img, _, _ = dataset[idx] bboxes, labels, scores = model.predict([img]) bbox, label, score = bboxes[0], labels[0], scores[0] ax = fig.add_subplot(len(indices), len(models), i * len(models) + j + 1) vis_bbox(img, bbox, label, score, label_names=voc_detection_label_names, ax=ax) # Set MatplotLib parameters ax.set_aspect('equal') if i == 0: font = FontProperties() font.set_family('serif') ax.set_title(name, fontsize=35, y=1.03, fontproperties=font) plot.axis('off') plot.tight_layout() plot.show()
def setUp(self): self.dataset = VOCDetectionDataset( split=self.split, year=self.year, use_difficult=self.use_difficult, return_difficult=self.return_difficult) self.n_out = 4 if self.return_difficult else 3
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model', choices=('faster_rcnn', 'ssd300', 'ssd512'), default='ssd300') parser.add_argument('--gpu', type=int, default=-1) parser.add_argument('--batchsize', type=int, default=32) args = parser.parse_args() if args.model == 'faster_rcnn': model = FasterRCNNVGG16(pretrained_model='voc07') elif args.model == 'ssd300': model = SSD300(pretrained_model='voc0712') elif args.model == 'ssd512': model = SSD512(pretrained_model='voc0712') if args.gpu >= 0: chainer.cuda.get_device(args.gpu).use() model.to_gpu() model.use_preset('evaluate') dataset = VOCDetectionDataset(year='2007', split='test', use_difficult=True, return_difficult=True) iterator = iterators.SerialIterator(dataset, args.batchsize, repeat=False, shuffle=False) pred_bboxes, pred_labels, pred_scores, gt_values = \ apply_detection_link(model, iterator, hook=ProgressHook(len(dataset))) gt_bboxes, gt_labels, gt_difficults = gt_values eval_ = eval_detection_voc(pred_bboxes, pred_labels, pred_scores, gt_bboxes, gt_labels, gt_difficults, use_07_metric=True) print() print('mAP: {:f}'.format(eval_['map'])) for l, name in enumerate(voc_detection_label_names): if l in eval_: print('{:s}: {:f}'.format(name, eval_[l]['ap'])) else: print('{:s}: -'.format(name))
def main(): parser = argparse.ArgumentParser( description='ChainerCV training example: Faster R-CNN') parser.add_argument('--gpu', '-g', type=int, default=-1) parser.add_argument('--lr', '-l', type=float, default=1e-3) parser.add_argument('--out', '-o', default='result', help='Output directory') parser.add_argument('--seed', '-s', type=int, default=0) parser.add_argument('--step_size', '-ss', type=int, default=50000) parser.add_argument('--iteration', '-i', type=int, default=70000) args = parser.parse_args() np.random.seed(args.seed) train_data = VOCDetectionDataset(split='trainval', year='2007') test_data = VOCDetectionDataset(split='test', year='2007', use_difficult=True, return_difficult=True) faster_rcnn = FasterRCNNVGG16(n_fg_class=len(voc_detection_label_names), pretrained_model='imagenet') faster_rcnn.use_preset('evaluate') model = FasterRCNNTrainChain(faster_rcnn) if args.gpu >= 0: model.to_gpu(args.gpu) chainer.cuda.get_device(args.gpu).use() optimizer = chainer.optimizers.MomentumSGD(lr=args.lr, momentum=0.9) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0005)) def transform(in_data): img, bbox, label = in_data _, H, W = img.shape img = faster_rcnn.prepare(img) _, o_H, o_W = img.shape scale = o_H / H bbox = transforms.resize_bbox(bbox, (H, W), (o_H, o_W)) # horizontally flip img, params = transforms.random_flip(img, x_random=True, return_param=True) bbox = transforms.flip_bbox(bbox, (o_H, o_W), x_flip=params['x_flip']) return img, bbox, label, scale train_data = TransformDataset(train_data, transform) train_iter = chainer.iterators.MultiprocessIterator(train_data, batch_size=1, n_processes=None, shared_mem=100000000) test_iter = chainer.iterators.SerialIterator(test_data, batch_size=1, repeat=False, shuffle=False) updater = chainer.training.updater.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.iteration, 'iteration'), out=args.out) trainer.extend(extensions.snapshot_object(model.faster_rcnn, 'snapshot_model.npz'), trigger=(args.iteration, 'iteration')) trainer.extend(extensions.ExponentialShift('lr', 0.1), trigger=(args.step_size, 'iteration')) log_interval = 20, 'iteration' plot_interval = 3000, 'iteration' print_interval = 20, 'iteration' trainer.extend(chainer.training.extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.PrintReport([ 'iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss', 'main/roi_loc_loss', 'main/roi_cls_loss', 'main/rpn_loc_loss', 'main/rpn_cls_loss', 'validation/main/map', ]), trigger=print_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) if extensions.PlotReport.available(): trainer.extend(extensions.PlotReport(['main/loss'], file_name='loss.png', trigger=plot_interval), trigger=plot_interval) trainer.extend( DetectionVOCEvaluator(test_iter, model.faster_rcnn, use_07_metric=True, label_names=voc_detection_label_names), trigger=ManualScheduleTrigger([args.step_size, args.iteration], 'iteration'), invoke_before_training=False) trainer.extend(extensions.dump_graph('main/loss')) trainer.run()
def main(): chainer.config.train = False parser = argparse.ArgumentParser() parser.add_argument('--model', choices=('faster_rcnn', 'ssd300', 'ssd512'), default='ssd300') parser.add_argument('--pretrained_model') parser.add_argument('--gpu', type=int, default=-1) parser.add_argument('--batchsize', type=int, default=32) args = parser.parse_args() if args.model == 'faster_rcnn': if args.pretrained_model: model = FasterRCNNVGG16(n_fg_class=20, pretrained_model=args.pretrained_model) else: model = FasterRCNNVGG16(pretrained_model='voc07') elif args.model == 'ssd300': if args.pretrained_model: model = SSD300(n_fg_class=20, pretrained_model=args.pretrained_model) else: model = SSD300(pretrained_model='voc0712') elif args.model == 'ssd512': if args.pretrained_model: model = SSD512(n_fg_class=20, pretrained_model=args.pretrained_model) else: model = SSD512(pretrained_model='voc0712') if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() model.use_preset('evaluate') dataset = VOCDetectionDataset(year='2007', split='test', use_difficult=True, return_difficult=True) iterator = iterators.SerialIterator(dataset, args.batchsize, repeat=False, shuffle=False) imgs, pred_values, gt_values = apply_prediction_to_iterator( model.predict, iterator, hook=ProgressHook(len(dataset))) # delete unused iterator explicitly del imgs pred_bboxes, pred_labels, pred_scores = pred_values gt_bboxes, gt_labels, gt_difficults = gt_values result = eval_detection_voc(pred_bboxes, pred_labels, pred_scores, gt_bboxes, gt_labels, gt_difficults, use_07_metric=True) print() print('mAP: {:f}'.format(result['map'])) for l, name in enumerate(voc_detection_label_names): if result['ap'][l]: print('{:s}: {:f}'.format(name, result['ap'][l])) else: print('{:s}: -'.format(name))
import numpy as np import chainer from chainercv.datasets import VOCDetectionDataset from chainercv.extensions import DetectionVisReport from chainercv.wrappers import bbox_resize_hook from chainercv.wrappers import SubtractWrapper from chainercv.wrappers import ResizeWrapper from chainercv.wrappers import output_shape_soft_min_hard_max from faster_rcnn import FasterRCNN if __name__ == '__main__': test_data = VOCDetectionDataset(mode='train', use_cache=True, year='2007', bgr=True) wrappers = [ lambda d: SubtractWrapper(d, value=np.array([103.939, 116.779, 123.68])), lambda d: ResizeWrapper(d, preprocess_idx=0, output_shape=output_shape_soft_min_hard_max( 600, 1200), hook=bbox_resize_hook(1)), ] for wrapper in wrappers: test_data = wrapper(test_data) model = FasterRCNN() chainer.serializers.load_npz('VGG16_faster_rcnn_final.model', model)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model', choices=('ssd300', 'ssd512'), default='ssd300') parser.add_argument('--batchsize', type=int, default=32) parser.add_argument('--gpu', type=int, default=-1) parser.add_argument('--out', default='result') parser.add_argument('--resume') args = parser.parse_args() if args.model == 'ssd300': model = SSD300(n_fg_class=len(voc_detection_label_names), pretrained_model='imagenet') elif args.model == 'ssd512': model = SSD512(n_fg_class=len(voc_detection_label_names), pretrained_model='imagenet') model.use_preset('evaluate') train_chain = MultiboxTrainChain(model) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() train = TransformDataset( ConcatenatedDataset(VOCDetectionDataset(year='2007', split='trainval'), VOCDetectionDataset(year='2012', split='trainval')), Transform(model.coder, model.insize, model.mean)) train_iter = chainer.iterators.MultiprocessIterator(train, args.batchsize) test = VOCDetectionDataset(year='2007', split='test', use_difficult=True, return_difficult=True) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) # initial lr is set to 1e-3 by ExponentialShift optimizer = chainer.optimizers.MomentumSGD() optimizer.setup(train_chain) for param in train_chain.params(): if param.name == 'b': param.update_rule.add_hook(GradientScaling(2)) else: param.update_rule.add_hook(WeightDecay(0.0005)) updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (120000, 'iteration'), args.out) trainer.extend(extensions.ExponentialShift('lr', 0.1, init=1e-3), trigger=triggers.ManualScheduleTrigger([80000, 100000], 'iteration')) trainer.extend(DetectionVOCEvaluator( test_iter, model, use_07_metric=True, label_names=voc_detection_label_names), trigger=(10000, 'iteration')) log_interval = 10, 'iteration' trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc', 'main/loss/conf', 'validation/main/map' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.extend(extensions.snapshot(), trigger=(10000, 'iteration')) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=(120000, 'iteration')) if args.resume: serializers.load_npz(args.resume, trainer) trainer.run()
from chainercv.datasets import VOCSemanticSegmentationDataset from chainercv.datasets \ import voc_semantic_segmentation_label_colors from chainercv.datasets \ import voc_semantic_segmentation_label_names from chainercv.visualizations import vis_image from chainercv.visualizations import vis_label import matplotlib.pyplot as plot fig = plot.figure(figsize=(26, 10)) ax1 = fig.add_subplot(1, 2, 1) plot.axis('off') ax2 = fig.add_subplot(1, 2, 2) plot.axis('off') dataset = VOCDetectionDataset() img, bbox, label = dataset[310] vis_bbox(img, bbox, label, label_names=voc_detection_label_names, ax=ax1) dataset = VOCSemanticSegmentationDataset() img, label = dataset[30] vis_image(img, ax=ax2) _, legend_handles = vis_label( label, label_names=voc_semantic_segmentation_label_names, label_colors=voc_semantic_segmentation_label_colors, alpha=0.9, ax=ax2) # ax2.legend(handles=legend_handles, bbox_to_anchor=(1, 1), loc=2)
plt.Rectangle(xy, width, height, fill=False, edgecolor='red', linewidth=3)) if label_names is not None: ax.text(bbox[0], bbox[1], label_names[bbox[4].astype(np.int)], style='italic', bbox={ 'facecolor': 'white', 'alpha': 0.7, 'pad': 10 }) if __name__ == '__main__': from chainercv.datasets import VOCDetectionDataset dataset = VOCDetectionDataset() for i in range(0, 100, 4): fig = plt.figure(figsize=(14, 14)) for j in range(4): ax = fig.add_subplot(2, 2, j + 1) img, bboxes = dataset.get_raw_data(i + j) vis_img_bbox(img, bboxes, dataset.labels, ax) plt.show()
label_names = getattr(self.dataset, 'labels', None) vis_img_bbox(vis_img, raw_bboxes, label_names=label_names, ax=ax_gt) ax_pred = fig.add_subplot(2, 1, 2) ax_pred.set_title('prediction') vis_img_bbox(vis_img, bboxes, label_names=label_names, ax=ax_pred) plt.savefig(out_file) if __name__ == '__main__': from chainercv.datasets import VOCDetectionDataset from chainercv.testing import ConstantReturnModel import mock import tempfile train_data = VOCDetectionDataset(mode='train', use_cache=True, year='2007') _, bbox = train_data.get_example(3) model = ConstantReturnModel(bbox[None]) trainer = mock.MagicMock() out_dir = tempfile.mkdtemp() print('outdir ', out_dir) trainer.out = out_dir trainer.updater.iteration = 0 extension = DetectionVisReport([3], train_data, model) extension(trainer)
def main(gpu=-1, epoch=100, batch_size=1, lr=5e-4, out='result'): train_data = VOCDetectionDataset(mode='train', use_cache=True, year='2007') test_data = VOCDetectionDataset(mode='val', use_cache=True, year='2007') def transform(in_data): img, bbox = in_data img -= np.array([103.939, 116.779, 123.68])[:, None, None] # Resize bounding box to a shape # with the smaller edge at least at length 600 input_shape = img.shape[1:] output_shape = _shape_soft_min_hard_max(input_shape, 600, 1200) img = transforms.resize(img, output_shape) bbox = transforms.bbox_resize(bbox, input_shape, output_shape) # horizontally flip img, flips = transforms.random_flip(img, horizontal_flip=True, return_flip=True) h_flip = flips['h'] bbox = transforms.bbox_flip(bbox, output_shape, h_flip) return img, bbox transforms.extend(train_data, transform) transforms.extend(test_data, transform) model = FasterRCNN(gpu=gpu) if gpu != -1: model.to_gpu(gpu) chainer.cuda.get_device(gpu).use() # optimizer = chainer.optimizers.MomentumSGD(lr=lr) optimizer = chainer.optimizers.Adam(alpha=0.001, beta1=0.9, beta2=0.999, eps=1e-8) optimizer.setup(model) # optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0005)) train_iter = chainer.iterators.SerialIterator(test_data, batch_size=1) updater = ParallelUpdater(train_iter, optimizer, devices={'main': gpu}) # updater = chainer.training.updater.StandardUpdater(train_iter, optimizer) trainer = training.Trainer(updater, (epoch, 'epoch'), out=out) log_interval = 20, 'iteration' val_interval = 3000, 'iteration' trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.PrintReport([ 'iteration', 'main/time', 'main/rpn_loss_cls', 'main/rpn_loss_bbox', 'main/loss_cls', 'main/loss_bbox' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) # visualize training trainer.extend(extensions.PlotReport(['main/rpn_loss_cls'], file_name='rpn_loss_cls.png'), trigger=log_interval) trainer.extend(extensions.PlotReport(['main/rpn_loss_bbox'], file_name='rpn_loss_bbox.png'), trigger=log_interval) trainer.extend(extensions.PlotReport(['main/loss_cls'], file_name='loss_cls.png'), trigger=log_interval) trainer.extend(extensions.PlotReport(['main/loss_bbox'], file_name='loss_bbox.png'), trigger=log_interval) trainer.extend( DetectionVisReport( range(10), # visualize outputs for the first 10 data of test_data train_data, model, filename_base='detection_train', predict_func=model.predict_bboxes), trigger=val_interval, invoke_before_training=True) trainer.extend( DetectionVisReport( range(10), # visualize outputs for the first 10 data of test_data test_data, model, forward_func=model.predict_bboxes), trigger=val_interval, invoke_before_training=True) trainer.extend(extensions.dump_graph('main/loss')) trainer.run()
def main(): parser = argparse.ArgumentParser( description='ChainerCV training example: Faster R-CNN') parser.add_argument('--dataset', choices=('voc07', 'voc0712'), help='The dataset to use: VOC07, VOC07+12', default='voc07') parser.add_argument('--gpu', '-g', type=int, default=-1) parser.add_argument('--lr', '-l', type=float, default=1e-3) parser.add_argument('--out', '-o', default='result', help='Output directory') parser.add_argument('--seed', '-s', type=int, default=0) parser.add_argument('--step_size', '-ss', type=int, default=50000) parser.add_argument('--iteration', '-i', type=int, default=70000) args = parser.parse_args() np.random.seed(args.seed) if args.dataset == 'voc07': train_data = VOCDetectionDataset(split='trainval', year='2007') elif args.dataset == 'voc0712': train_data = ConcatenatedDataset( VOCDetectionDataset(year='2007', split='trainval'), VOCDetectionDataset(year='2012', split='trainval')) test_data = VOCDetectionDataset(split='test', year='2007', use_difficult=True, return_difficult=True) faster_rcnn = FasterRCNNVGG16(n_fg_class=len(voc_detection_label_names), pretrained_model='imagenet') faster_rcnn.use_preset('evaluate') model = FasterRCNNTrainChain(faster_rcnn) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() optimizer = chainer.optimizers.MomentumSGD(lr=args.lr, momentum=0.9) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0005)) train_data = TransformDataset(train_data, Transform(faster_rcnn)) train_iter = chainer.iterators.MultiprocessIterator(train_data, batch_size=1, n_processes=None, shared_mem=100000000) test_iter = chainer.iterators.SerialIterator(test_data, batch_size=1, repeat=False, shuffle=False) updater = chainer.training.updater.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.iteration, 'iteration'), out=args.out) trainer.extend(extensions.snapshot_object(model.faster_rcnn, 'snapshot_model.npz'), trigger=(args.iteration, 'iteration')) trainer.extend(extensions.ExponentialShift('lr', 0.1), trigger=(args.step_size, 'iteration')) log_interval = 20, 'iteration' plot_interval = 3000, 'iteration' print_interval = 20, 'iteration' trainer.extend(chainer.training.extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.PrintReport([ 'iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss', 'main/roi_loc_loss', 'main/roi_cls_loss', 'main/rpn_loc_loss', 'main/rpn_cls_loss', 'validation/main/map', ]), trigger=print_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) if extensions.PlotReport.available(): trainer.extend(extensions.PlotReport(['main/loss'], file_name='loss.png', trigger=plot_interval), trigger=plot_interval) trainer.extend( DetectionVOCEvaluator(test_iter, model.faster_rcnn, use_07_metric=True, label_names=voc_detection_label_names), trigger=ManualScheduleTrigger([args.step_size, args.iteration], 'iteration')) trainer.extend(extensions.dump_graph('main/loss')) trainer.run()